├── .gitignore
├── LICENSE
├── README.md
├── common
    └── __init__.py
├── global_config.py
├── gradio_server.py
├── human_simulator.py
├── imgs
    └── webui-snapshot.png
├── init_prompt.json
├── misc
    ├── ali_pay.png
    ├── placeholder
    └── wechat_pay.png
├── models
    ├── __init__.py
    ├── aquila_fa.py
    ├── aquila_hf.py
    ├── baichuan_hf.py
    ├── chatglm_hf.py
    ├── falcon_hf.py
    └── vicuna_bin.py
├── prompts
    ├── __init__.py
    ├── chatgpt_query.py
    ├── human_simulator.py
    ├── llm_query.py
    └── service_init.py
├── recurrent_llm.py
├── recurrentgpt.py
├── requirements.txt
└── utils
    ├── __init__.py
    ├── aquila_util.py
    ├── baichuan_util.py
    ├── chatglm_util.py
    ├── falcon_util.py
    ├── openai_util.py
    └── vicuna_util.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 MK
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## **Recurrent-LLM**
  2 | The Open Source LLM implementation of paper: 
  3 | 
  4 | **RecurrentGPT: Interactive Generation of (Arbitrarily) Long Text**.
  5 | 
  6 | [[Paper](https://arxiv.org/pdf/2305.13304v1.pdf)] [[arxiv](https://arxiv.org/abs/2305.13304v1)] [[HuggingFace](https://huggingface.co/papers/2305.13304)] [[Offical](https://github.com/aiwaves-cn/RecurrentGPT)]
  7 | 
  8 | The fixed-size context of Transformer makes GPT models incapable of generating arbitrarily long text. In this paper, we introduce RecurrentGPT, a language-based simulacrum of the recurrence mechanism in RNNs. RecurrentGPT is built upon a large language model (LLM) such as ChatGPT and uses natural language to simulate the Long Short-Term Memory mechanism in an LSTM. At each timestep, RecurrentGPT generates a paragraph of text and updates its language-based long-short term memory stored on the hard drive and the prompt, respectively. This recurrence mechanism enables RecurrentGPT to generate texts of arbitrary length without forgetting. Since human users can easily observe and edit the natural language memories, RecurrentGPT is interpretable and enables interactive generation of long text. RecurrentGPT is an initial step towards next-generation computer-assisted writing systems beyond local editing suggestions. In addition to producing AI-generated content (AIGC), we also demonstrate the possibility of using RecurrentGPT as an interactive fiction that directly interacts with consumers. We call this usage of generative models by ``AI As Contents'' (AIAC), which we believe is the next form of conventional AIGC. We further demonstrate the possibility of using RecurrentGPT to create personalized interactive fiction that directly interacts with readers instead of interacting with writers. More broadly, RecurrentGPT demonstrates the utility of borrowing ideas from popular model designs in cognitive science and deep learning for prompting LLMs. 
  9 | 
 10 | Transformer的固定尺寸上下文使得GPT模型无法生成任意长的文本。在本文中，我们介绍了RecurrentGPT，一个基于语言的模拟RNNs中的递归机制。RecurrentGPT建立在大型语言模型（LLM）之上，如ChatGPT，并使用自然语言来模拟LSTM中的长短时记忆机制。在每个时间段，RecurrentGPT生成一段文字，并更新其基于语言的长短时记忆，分别存储在硬盘和提示器上。这种递归机制使RecurrentGPT能够生成任意长度的文本而不被遗忘。由于人类用户可以很容易地观察和编辑自然语言记忆，因此RecurrentGPT是可解释的，并能互动地生成长文本。RecurrentGPT是朝着超越本地编辑建议的下一代计算机辅助写作系统迈出的第一步。除了制作人工智能生成的内容（AIGC），我们还展示了使用RecurrentGPT作为直接与消费者互动的互动小说的可能性。我们称这种生成模型的使用为 "AI As Contents"（AIAC），我们认为这是传统AIGC的下一个形式。我们进一步展示了使用RecurrentGPT创造个性化互动小说的可能性，这种小说直接与读者互动，而不是与作者互动。更广泛地说，RecurrentGPT证明了从认知科学和深度学习中流行的模型设计中借用思想来提示LLM的效用。
 11 | 
 12 | ---
 13 | 
 14 | ## **Table of Contents**
 15 | - [**Recurrent-LLM**](#recurrent-llm)
 16 | - [**Table of Contents**](#table-of-contents)
 17 | - [**Requirements**](#requirements)
 18 | - [**Configuration**](#configuration)
 19 |   - [**Global Config**](#global-config)
 20 |   - [**Supported LLM options**](#supported-llm-options)
 21 |     - [**OpenAI ChatGPT**](#openai-chatgpt)
 22 |     - [**Vicuna**](#vicuna)
 23 |     - [**ChatGLM**](#chatglm)
 24 |     - [**Baichuan**](#baichuan)
 25 |     - [**Aquila**](#aquila)
 26 |     - [**Falcon**](#falcon)
 27 | - [**Usage**](#usage)
 28 |   - [**start web server**](#start-web-server)
 29 | - [**WebUI**](#webui)
 30 | - [**Star-History**](#star-history)
 31 | - [**License**](#license)
 32 | 
 33 | ## **Requirements**
 34 | 
 35 | ```
 36 | pip install transformers@git+https://github.com/huggingface/transformers.git
 37 | pip install peft@git+https://github.com/huggingface/peft.git
 38 | pip install accelerate@git+https://github.com/huggingface/accelerate.git
 39 | pip install bitsandbytes==0.39.0
 40 | 
 41 | pip install -U flagai
 42 | pip install bminf
 43 | ```
 44 | 
 45 | ## **Configuration** 
 46 | 
 47 | ### **Global Config**
 48 | [[global_config.py](./global_config.py)]
 49 | 
 50 | ```
 51 | lang_opt = "zh" #  zh or en. make English or Chinese Novel
 52 | llm_model_opt = "openai" # default is openai, it also can be other open-source LLMs as below
 53 | ```
 54 | 
 55 | ### **Supported LLM options**
 56 | 
 57 | - [x] openai
 58 | - [x] vicuna
 59 | - [x] chatglm
 60 | - [x] baichuan
 61 | - [x] aquila
 62 | - [x] falcon 
 63 | 
 64 | #### **OpenAI ChatGPT**
 65 | 
 66 | you should apply an openai api key first. then
 67 | ```
 68 | export OPENAI_API_KEY = "your key"
 69 | ```
 70 | 
 71 | #### **Vicuna**
 72 | 
 73 | download vicuna model. and config it in [models/vicuna_bin.py](models/vicuna_bin.py)
 74 | 
 75 | #### **ChatGLM**
 76 | 
 77 | ```python
 78 | tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,trust_remote_code=True)
 79 | model_config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
 80 | model = AutoModel.from_pretrained(model_name_or_path, config=model_config, trust_remote_code=True)
 81 | ```
 82 | 
 83 | #### **Baichuan**
 84 | 
 85 | ```python
 86 | tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B", trust_remote_code=True)
 87 | model = AutoModelForCausalLM.from_pretrained("baichuan-inc/baichuan-7B", device_map="auto", trust_remote_code=True)
 88 | ```
 89 | 
 90 | #### **Aquila**
 91 | 
 92 | ```python
 93 | loader = AutoLoader(
 94 |         "lm",
 95 |         model_dir=state_dict,
 96 |         model_name=model_name,
 97 |         use_cache=True,
 98 |         fp16=True)
 99 | model = loader.get_model()
100 | tokenizer = loader.get_tokenizer()
101 | model.eval()
102 | ```
103 | If want to use bminf, then add code as below:
104 | ```python
105 | with torch.cuda.device(0):
106 |     model = bminf.wrapper(model, quantization=False, memory_limit=2 << 30)
107 | ```
108 | 
109 | 
110 | #### **Falcon**
111 | 
112 | 
113 | ## **Usage**
114 | 
115 | ### **start web server**
116 | 
117 | ```
118 | python gradio_server.py
119 | ```
120 | 
121 | 
122 | ## **WebUI**
123 | 
124 | <img src="./imgs/webui-snapshot.png">
125 | 
126 | ------
127 | ## **Star-History**
128 | 
129 | ![star-history](https://api.star-history.com/svg?repos=jackaduma/Recurrent-LLM&type=Date "star-history")
130 | 
131 | ------
132 | 
133 | ## Donation
134 | If this project help you reduce time to develop, you can give me a cup of coffee :) 
135 | 
136 | AliPay(支付宝)
137 | <div align="center">
138 | 	<img src="./misc/ali_pay.png" alt="ali_pay" width="400" />
139 | </div>
140 | 
141 | WechatPay(微信)
142 | <div align="center">
143 |     <img src="./misc/wechat_pay.png" alt="wechat_pay" width="400" />
144 | </div>
145 | 
146 | ------
147 | 
148 | ## **License**
149 | 
150 | [MIT](LICENSE) © Kun
151 | 


--------------------------------------------------------------------------------
/common/__init__.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | 
 6 | import torch
 7 | 
 8 | def torch_gc():
 9 |     if torch.cuda.is_available():
10 |         # with torch.cuda.device(DEVICE):
11 |         torch.cuda.empty_cache()
12 |         torch.cuda.ipc_collect()
13 |     elif torch.backends.mps.is_available():
14 |         try:
15 |             from torch.mps import empty_cache
16 |             empty_cache()
17 |         except Exception as e:
18 |             print(e)
19 |             print("如果您使用的是 macOS 建议将 pytorch 版本升级至 2.0.0 或更高版本，以支持及时清理 torch 产生的内存占用。")


--------------------------------------------------------------------------------
/global_config.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | ####################################################
 6 | 
 7 | # lang_opt = "zh1"
 8 | lang_opt = "zh2"
 9 | # lang_opt = "en"
10 | 
11 | ####################################################
12 | 
13 | # llm_model_opt = "openai"
14 | # llm_model_opt = "vicuna"
15 | llm_model_opt = "chatglm"
16 | # llm_model_opt = "baichuan"
17 | # llm_model_opt = "aquila"
18 | # llm_model_opt = "falcon"
19 | 
20 | ####################################################


--------------------------------------------------------------------------------
/gradio_server.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | # -*- coding: utf-8 -*-
  3 | # @author: Kun
  4 | 
  5 | import gradio as gr
  6 | import random
  7 | from sentence_transformers import SentenceTransformer
  8 | from human_simulator import Human
  9 | from prompts.service_init import get_init_prompt
 10 | from utils import get_init, parse_instructions
 11 | from global_config import lang_opt, llm_model_opt
 12 | 
 13 | if "openai" == llm_model_opt:
 14 |     from recurrentgpt import RecurrentGPT as AIWriter
 15 |     llm_model = None
 16 |     llm_tokenizer = None
 17 | 
 18 | elif "vicuna" == llm_model_opt:
 19 |     from recurrent_llm import RecurrentLLM as AIWriter
 20 |     from models.vicuna_bin import load_model
 21 |     llm_tokenizer, llm_model = load_model()
 22 | 
 23 | elif "chatglm" == llm_model_opt:
 24 |     from recurrent_llm import RecurrentLLM as AIWriter
 25 |     from models.chatglm_hf import load_model
 26 |     llm_tokenizer, llm_model = load_model()
 27 | 
 28 | elif "baichuan" == llm_model_opt:
 29 |     from recurrent_llm import RecurrentLLM as AIWriter
 30 |     from models.baichuan_hf import load_model
 31 |     llm_tokenizer, llm_model = load_model()
 32 | 
 33 | elif "aquila" == llm_model_opt:
 34 |     from recurrent_llm import RecurrentLLM as AIWriter
 35 |     from models.aquila_fa import load_model
 36 |     # from models.aquila_hf import load_model
 37 |     llm_tokenizer, llm_model = load_model()
 38 | 
 39 | elif "falcon" == llm_model_opt:
 40 |     from recurrent_llm import RecurrentLLM
 41 |     from models.falcon_hf import load_model
 42 |     llm_tokenizer, llm_model = load_model()
 43 | 
 44 | else:
 45 |     raise Exception("not supported llm model name: {}".format(llm_model_opt))
 46 | 
 47 | # from urllib.parse import quote_plus
 48 | # from pymongo import MongoClient
 49 | 
 50 | # uri = "mongodb://%s:%s@%s" % (quote_plus("xxx"),
 51 | #                               quote_plus("xxx"), "localhost")
 52 | # client = MongoClient(uri, maxPoolSize=None)
 53 | # db = client.recurrentGPT_db
 54 | # log = db.log
 55 | 
 56 | _CACHE = {}
 57 | 
 58 | 
 59 | # Build the semantic search model
 60 | embedder = SentenceTransformer('multi-qa-mpnet-base-cos-v1')
 61 | 
 62 | 
 63 | def init_prompt(novel_type, description):
 64 |     if description == "":
 65 |         description = ""
 66 |     else:
 67 |         description = " about " + description
 68 | 
 69 |     return get_init_prompt(lang_opt, novel_type, description)
 70 | 
 71 | 
 72 | def init(novel_type, description, request: gr.Request):
 73 |     if novel_type == "":
 74 |         novel_type = "Science Fiction" if "en" == lang_opt else "科幻故事"
 75 |     global _CACHE
 76 |     cookie = request.headers['cookie']
 77 |     cookie = cookie.split('; _gat_gtag')[0]
 78 |     # prepare first init
 79 |     init_paragraphs = get_init(text=init_prompt(
 80 |         novel_type, description), model=llm_model, tokenizer=llm_tokenizer)
 81 |     # print(init_paragraphs)
 82 |     start_input_to_human = {
 83 |         'output_paragraph': init_paragraphs['Paragraph 3'],
 84 |         'input_paragraph': '\n\n'.join([init_paragraphs['Paragraph 1'], init_paragraphs['Paragraph 2'], init_paragraphs['Paragraph 3']]),
 85 |         'output_memory': init_paragraphs['Summary'],
 86 |         "output_instruction": [init_paragraphs['Instruction 1'], init_paragraphs['Instruction 2'], init_paragraphs['Instruction 3']]
 87 |     }
 88 | 
 89 |     _CACHE[cookie] = {"start_input_to_human": start_input_to_human,
 90 |                       "init_paragraphs": init_paragraphs}
 91 |     written_paras = f"""Title: {init_paragraphs['name']}
 92 | 
 93 | Outline: {init_paragraphs['Outline']}
 94 | 
 95 | Paragraphs:
 96 | 
 97 | {start_input_to_human['input_paragraph']}""" if "en" == lang_opt else f"""标题: {init_paragraphs['name']}
 98 | 
 99 | 梗概: {init_paragraphs['Outline']}
100 | 
101 | 段落:
102 | 
103 | {start_input_to_human['input_paragraph']}"""
104 |     long_memory = parse_instructions(
105 |         [init_paragraphs['Paragraph 1'], init_paragraphs['Paragraph 2'], init_paragraphs['Paragraph 3']])
106 |     # short memory, long memory, current written paragraphs, 3 next instructions
107 |     return start_input_to_human['output_memory'], long_memory, written_paras, init_paragraphs['Instruction 1'], init_paragraphs['Instruction 2'], init_paragraphs['Instruction 3']
108 | 
109 | 
110 | def step(short_memory, long_memory, instruction1, instruction2, instruction3, current_paras, request: gr.Request, ):
111 |     if current_paras == "":
112 |         return "", "", "", "", "", ""
113 |     global _CACHE
114 |     # print(list(_CACHE.keys()))
115 |     # print(request.headers.get('cookie'))
116 |     cookie = request.headers['cookie']
117 |     cookie = cookie.split('; _gat_gtag')[0]
118 |     cache = _CACHE[cookie]
119 | 
120 |     if "writer" not in cache:
121 |         start_input_to_human = cache["start_input_to_human"]
122 |         start_input_to_human['output_instruction'] = [
123 |             instruction1, instruction2, instruction3]
124 |         init_paragraphs = cache["init_paragraphs"]
125 |         human = Human(input=start_input_to_human,
126 |                       memory=None, embedder=embedder, model=llm_model, tokenizer=llm_tokenizer)
127 |         human.step()
128 |         start_short_memory = init_paragraphs['Summary']
129 |         writer_start_input = human.output
130 | 
131 |         # Init writerGPT
132 |         writer = AIWriter(input=writer_start_input, short_memory=start_short_memory, long_memory=[
133 |             init_paragraphs['Paragraph 1'], init_paragraphs['Paragraph 2'], init_paragraphs['Paragraph 3']], memory_index=None, embedder=embedder,
134 |             model=llm_model, tokenizer=llm_tokenizer)
135 |         cache["writer"] = writer
136 |         cache["human"] = human
137 |         writer.step()
138 |     else:
139 |         human = cache["human"]
140 |         writer = cache["writer"]
141 |         output = writer.output
142 |         output['output_memory'] = short_memory
143 |         # randomly select one instruction out of three
144 |         instruction_index = random.randint(0, 2)
145 |         output['output_instruction'] = [instruction1,
146 |                                         instruction2, instruction3][instruction_index]
147 |         human.input = output
148 |         human.step()
149 |         writer.input = human.output
150 |         writer.step()
151 | 
152 |     long_memory = [[v] for v in writer.long_memory]
153 |     # short memory, long memory, current written paragraphs, 3 next instructions
154 |     return writer.output['output_memory'], long_memory, current_paras + '\n\n' + writer.output['input_paragraph'], human.output['output_instruction'], *writer.output['output_instruction']
155 | 
156 | 
157 | def controled_step(short_memory, long_memory, selected_instruction, current_paras, request: gr.Request, ):
158 |     if current_paras == "":
159 |         return "", "", "", "", "", ""
160 |     global _CACHE
161 |     # print(list(_CACHE.keys()))
162 |     # print(request.headers.get('cookie'))
163 |     cookie = request.headers['cookie']
164 |     cookie = cookie.split('; _gat_gtag')[0]
165 |     cache = _CACHE[cookie]
166 |     if "writer" not in cache:
167 |         start_input_to_human = cache["start_input_to_human"]
168 |         start_input_to_human['output_instruction'] = selected_instruction
169 |         init_paragraphs = cache["init_paragraphs"]
170 |         human = Human(input=start_input_to_human,
171 |                       memory=None, embedder=embedder, model=llm_model, tokenizer=llm_tokenizer)
172 |         human.step()
173 |         start_short_memory = init_paragraphs['Summary']
174 |         writer_start_input = human.output
175 | 
176 |         # Init writerGPT
177 |         writer = AIWriter(input=writer_start_input, short_memory=start_short_memory, long_memory=[
178 |             init_paragraphs['Paragraph 1'], init_paragraphs['Paragraph 2'], init_paragraphs['Paragraph 3']], memory_index=None, embedder=embedder,
179 |             model=llm_model, tokenizer=llm_tokenizer)
180 |         cache["writer"] = writer
181 |         cache["human"] = human
182 |         writer.step()
183 |     else:
184 |         human = cache["human"]
185 |         writer = cache["writer"]
186 |         output = writer.output
187 |         output['output_memory'] = short_memory
188 |         output['output_instruction'] = selected_instruction
189 |         human.input = output
190 |         human.step()
191 |         writer.input = human.output
192 |         writer.step()
193 | 
194 |     # short memory, long memory, current written paragraphs, 3 next instructions
195 |     return writer.output['output_memory'], parse_instructions(writer.long_memory), current_paras + '\n\n' + writer.output['input_paragraph'], *writer.output['output_instruction']
196 | 
197 | 
198 | # SelectData is a subclass of EventData
199 | def on_select(instruction1, instruction2, instruction3, evt: gr.SelectData):
200 |     selected_plan = int(evt.value.replace("Instruction ", "")
201 |                         ) if "en" == lang_opt else int(evt.value.replace("指令 ", ""))
202 |     selected_plan = [instruction1, instruction2, instruction3][selected_plan-1]
203 |     return selected_plan
204 | 
205 | 
206 | def reload_model(choice):
207 |     pass
208 | 
209 | 
210 | with gr.Blocks(title="RecurrentGPT", css="footer {visibility: hidden}", theme="default") as demo:
211 |     if "en" == lang_opt:
212 |         gr.Markdown(
213 |             """
214 |         # Recurrent-LLM 
215 |         Interactive Generation of (Arbitrarily) Long Texts with Human-in-the-Loop
216 |         """)
217 |     elif lang_opt in ["zh1", "zh2"]:
218 |         gr.Markdown(
219 |             """
220 |         # Recurrent-LLM 
221 |         可以根据题目和简介自动续写文章 
222 |         也可以手动选择剧情走向进行续写 
223 |         """)
224 | 
225 |     with gr.Tab("Auto-Generation"):
226 |         with gr.Row():
227 |             with gr.Column():
228 |                 with gr.Box():
229 |                     with gr.Row():
230 |                         with gr.Column(scale=1, min_width=200):
231 |                             novel_type = gr.Textbox(
232 |                                 label="Novel Type", placeholder="e.g. science fiction") if "en" == lang_opt else gr.Textbox(
233 |                                 label="请输入文本", placeholder="可以自己填写或者从EXamples中选择一个填入")
234 |                         with gr.Column(scale=2, min_width=400):
235 |                             description = gr.Textbox(
236 |                                 label="Description") if "en" == lang_opt else gr.Textbox(label="剧情简介（非必选项）")
237 |                 btn_init = gr.Button(
238 |                     "Init Novel Generation", variant="primary") if "en" == lang_opt else gr.Button(
239 |                     "点击开始运行", variant="primary")
240 |                 if "en" == lang_opt:
241 |                     gr.Examples(["Science Fiction", "Romance", "Mystery", "Fantasy",
242 |                                  "Historical", "Horror", "Thriller", "Western", "Young Adult", ], inputs=[novel_type])
243 |                 elif lang_opt in ["zh1", "zh2"]:
244 |                     gr.Examples(["科幻故事", "青春伤痛文学", "爱到死去活来", "搞笑",
245 |                                  "幽默", "鬼故事", "喜剧", "童话", "魔法世界", ], inputs=[novel_type])
246 |                 else:
247 |                     raise Exception(f"not supported language: {lang_opt}")
248 | 
249 |                 written_paras = gr.Textbox(
250 |                     label="Written Paragraphs (editable)", max_lines=21, lines=21) if "en" == lang_opt else gr.Textbox(
251 |                     label="文章内容", max_lines=21, lines=21)
252 |             with gr.Column():
253 |                 with gr.Box():
254 |                     if "en" == lang_opt:
255 |                         gr.Markdown("### Memory Module\n")
256 |                     elif lang_opt in ["zh1", "zh2"]:
257 |                         gr.Markdown("### 剧情模型\n")
258 | 
259 |                     short_memory = gr.Textbox(
260 |                         label="Short-Term Memory (editable)", max_lines=3, lines=3) if "en" == lang_opt else gr.Textbox(
261 |                         label="短期记忆 (可编辑)", max_lines=3, lines=3)
262 |                     long_memory = gr.Textbox(
263 |                         label="Long-Term Memory (editable)", max_lines=6, lines=6) if "en" == lang_opt else gr.Textbox(
264 |                         label="长期记忆 (可编辑)", max_lines=6, lines=6)
265 |                     # long_memory = gr.Dataframe(
266 |                     #     # label="Long-Term Memory (editable)",
267 |                     #     headers=["Long-Term Memory (editable)"],
268 |                     #     datatype=["str"],
269 |                     #     row_count=3,
270 |                     #     max_rows=3,
271 |                     #     col_count=(1, "fixed"),
272 |                     #     type="array",
273 |                     # )
274 |                 with gr.Box():
275 |                     if "en" == lang_opt:
276 |                         gr.Markdown("### Instruction Module\n")
277 |                     elif lang_opt in ["zh1", "zh2"]:
278 |                         gr.Markdown("### 选项模型\n")
279 | 
280 |                     with gr.Row():
281 |                         instruction1 = gr.Textbox(
282 |                             label="Instruction 1 (editable)", max_lines=4, lines=4) if "en" == lang_opt else gr.Textbox(
283 |                             label="指令1(可编辑)", max_lines=4, lines=4)
284 |                         instruction2 = gr.Textbox(
285 |                             label="Instruction 2 (editable)", max_lines=4, lines=4) if "en" == lang_opt else gr.Textbox(
286 |                             label="指令2(可编辑)", max_lines=4, lines=4)
287 |                         instruction3 = gr.Textbox(
288 |                             label="Instruction 3 (editable)", max_lines=4, lines=4) if "en" == lang_opt else gr.Textbox(
289 |                             label="指令3(可编辑)", max_lines=4, lines=4)
290 |                     selected_plan = gr.Textbox(
291 |                         label="Revised Instruction (from last step)", max_lines=2, lines=2) if "en" == lang_opt else gr.Textbox(
292 |                         label="选项说明 (来自上一步)", max_lines=2, lines=2)
293 | 
294 |                 btn_step = gr.Button("Next Step", variant="primary") if "en" == lang_opt else gr.Button(
295 |                     "下一步", variant="primary")
296 | 
297 |         btn_init.click(init, inputs=[novel_type, description], outputs=[
298 |             short_memory, long_memory, written_paras, instruction1, instruction2, instruction3])
299 |         btn_step.click(step, inputs=[short_memory, long_memory, instruction1, instruction2, instruction3, written_paras], outputs=[
300 |             short_memory, long_memory, written_paras, selected_plan, instruction1, instruction2, instruction3])
301 | 
302 |     with gr.Tab("Human-in-the-Loop"):
303 |         with gr.Row():
304 |             with gr.Column():
305 |                 with gr.Box():
306 |                     with gr.Row():
307 |                         with gr.Column(scale=1, min_width=200):
308 |                             novel_type = gr.Textbox(
309 |                                 label="Novel Type", placeholder="e.g. science fiction") if "en" == lang_opt else gr.Textbox(
310 |                                 label="请输入文本", placeholder="可以自己填写或者从EXamples中选择一个填入")
311 |                         with gr.Column(scale=2, min_width=400):
312 |                             description = gr.Textbox(
313 |                                 label="Description") if "en" == lang_opt else gr.Textbox(label="剧情简介（非必选项）")
314 |                 btn_init = gr.Button(
315 |                     "Init Novel Generation", variant="primary") if "en" == lang_opt else gr.Button(
316 |                     "点击开始运行", variant="primary")
317 | 
318 |                 if "en" == lang_opt:
319 |                     gr.Examples(["Science Fiction", "Romance", "Mystery", "Fantasy",
320 |                                  "Historical", "Horror", "Thriller", "Western", "Young Adult", ], inputs=[novel_type])
321 |                 elif lang_opt in ["zh1", "zh2"]:
322 |                     gr.Examples(["科幻小说", "爱情小说", "推理小说", "奇幻小说",
323 |                                  "玄幻小说", "恐怖", "悬疑", "惊悚", "武侠小说", ], inputs=[novel_type])
324 | 
325 |                 written_paras = gr.Textbox(
326 |                     label="Written Paragraphs (editable)", max_lines=23, lines=23) if "en" == lang_opt else gr.Textbox(
327 |                     label="文章内容 (可编辑)", max_lines=23, lines=23)
328 |             with gr.Column():
329 |                 with gr.Box():
330 |                     if "en" == lang_opt:
331 |                         gr.Markdown("### Memory Module\n")
332 |                     elif lang_opt in ["zh1", "zh2"]:
333 |                         gr.Markdown("### 剧情模型\n")
334 | 
335 |                     short_memory = gr.Textbox(
336 |                         label="Short-Term Memory (editable)", max_lines=3, lines=3) if "en" == lang_opt else gr.Textbox(
337 |                         label="短期记忆 (可编辑)", max_lines=3, lines=3)
338 |                     long_memory = gr.Textbox(
339 |                         label="Long-Term Memory (editable)", max_lines=6, lines=6) if "en" == lang_opt else gr.Textbox(
340 |                         label="长期记忆 (可编辑)", max_lines=6, lines=6)
341 |                 with gr.Box():
342 |                     if "en" == lang_opt:
343 |                         gr.Markdown("### Instruction Module\n")
344 |                     elif lang_opt in ["zh1", "zh2"]:
345 |                         gr.Markdown("### 选项模型\n")
346 | 
347 |                     with gr.Row():
348 |                         instruction1 = gr.Textbox(
349 |                             label="Instruction 1", max_lines=3, lines=3, interactive=False) if "en" == lang_opt else gr.Textbox(
350 |                             label="指令1", max_lines=3, lines=3, interactive=False)
351 |                         instruction2 = gr.Textbox(
352 |                             label="Instruction 2", max_lines=3, lines=3, interactive=False) if "en" == lang_opt else gr.Textbox(
353 |                             label="指令2", max_lines=3, lines=3, interactive=False)
354 |                         instruction3 = gr.Textbox(
355 |                             label="Instruction 3", max_lines=3, lines=3, interactive=False) if "en" == lang_opt else gr.Textbox(
356 |                             label="指令3", max_lines=3, lines=3, interactive=False)
357 |                     with gr.Row():
358 |                         with gr.Column(scale=1, min_width=100):
359 |                             selected_plan = gr.Radio(
360 |                                 ["Instruction 1", "Instruction 2", "Instruction 3"], label="Instruction Selection",) if "en" == lang_opt else gr.Radio(["指令 1", "指令 2", "指令 3"], label="指令 选择",)
361 |                             #  info="Select the instruction you want to revise and use for the next step generation.")
362 |                         with gr.Column(scale=3, min_width=300):
363 |                             selected_instruction = gr.Textbox(
364 |                                 label="Selected Instruction (editable)", max_lines=5, lines=5) if "en" == lang_opt else gr.Textbox(
365 |                                 label="在上一步骤中被选择的 (可编辑)", max_lines=5, lines=5)
366 | 
367 |                 btn_step = gr.Button("Next Step", variant="primary") if "en" == lang_opt else gr.Button(
368 |                     "下一步", variant="primary")
369 | 
370 |         btn_init.click(init, inputs=[novel_type, description], outputs=[
371 |             short_memory, long_memory, written_paras, instruction1, instruction2, instruction3])
372 |         btn_step.click(controled_step, inputs=[short_memory, long_memory, selected_instruction, written_paras], outputs=[
373 |             short_memory, long_memory, written_paras, instruction1, instruction2, instruction3])
374 |         selected_plan.select(on_select, inputs=[
375 |                              instruction1, instruction2, instruction3], outputs=[selected_instruction])
376 | 
377 |     with gr.Tab("Model-Config"):
378 |         model_opt_radio = gr.Radio(["OpenAI", "ChatGLM-6B", "Vicuna-7B"], value="OpenAI", label="model",
379 |                                    info="select language you preferred. Default is English.",
380 |                                    interactive=True
381 |                                    )
382 | 
383 |         reload_button = gr.Button("Reload/重新加载")
384 |         reload_button.click(reload_model, show_progress=True,
385 |                             inputs=[model_opt_radio],
386 |                             outputs=[novel_type])
387 | 
388 |     demo.queue(concurrency_count=1)
389 | 
390 | if __name__ == "__main__":
391 |     demo.launch(server_port=8005, share=True,
392 |                 debug=True,
393 |                 server_name="0.0.0.0", show_api=False)
394 | 


--------------------------------------------------------------------------------
/human_simulator.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | # -*- coding: utf-8 -*-
  3 | # @author: Kun
  4 | 
  5 | 
  6 | from utils import get_content_between_a_b, parse_instructions
  7 | from prompts.human_simulator import get_input_text
  8 | from global_config import lang_opt, llm_model_opt
  9 | 
 10 | if "openai" == llm_model_opt:
 11 |     from utils.openai_util import get_api_response
 12 | elif "vicuna" == llm_model_opt:
 13 |     from utils.vicuna_util import get_api_response
 14 | elif "chatglm" == llm_model_opt:
 15 |     from utils.chatglm_util import get_api_response
 16 | elif "baichuan" == llm_model_opt:
 17 |     from utils.baichuan_util import get_api_response
 18 | elif "aquila" == llm_model_opt:
 19 |     from utils.aquila_util import get_api_response
 20 | elif "falcon" == llm_model_opt:
 21 |     from utils.falcon_util import get_api_response
 22 | else:
 23 |     raise Exception("not supported llm model name: {}".format(llm_model_opt))
 24 | 
 25 | 
 26 | class Human:
 27 | 
 28 |     def __init__(self, input, memory, embedder, model, tokenizer):
 29 |         self.input = input
 30 |         if memory:
 31 |             self.memory = memory
 32 |         else:
 33 |             self.memory = self.input['output_memory']
 34 |         self.embedder = embedder
 35 |         self.model = model
 36 |         self.tokenizer = tokenizer
 37 |         self.output = {}
 38 | 
 39 |     def prepare_input(self):
 40 |         previous_paragraph = self.input["input_paragraph"]
 41 |         writer_new_paragraph = self.input["output_paragraph"]
 42 |         memory = self.input["output_memory"]
 43 |         user_edited_plan = self.input["output_instruction"]
 44 | 
 45 |         input_text = get_input_text(
 46 |             lang_opt, previous_paragraph, memory, writer_new_paragraph, user_edited_plan)
 47 | 
 48 |         return input_text
 49 | 
 50 |     def parse_plan(self, response):
 51 |         plan = get_content_between_a_b('Selected Plan:', 'Reason', response)
 52 |         return plan
 53 | 
 54 |     def select_plan(self, response_file): # TODO ???
 55 | 
 56 |         previous_paragraph = self.input["input_paragraph"]
 57 |         writer_new_paragraph = self.input["output_paragraph"]
 58 |         memory = self.input["output_memory"]
 59 |         previous_plans = self.input["output_instruction"]
 60 |         prompt = f"""
 61 |     Now imagine you are a helpful assistant that help a novelist with decision making. You will be given a previously written paragraph and a paragraph written by a ChatGPT writing assistant, a summary of the main storyline maintained by the ChatGPT assistant, and 3 different possible plans of what to write next.
 62 |     I need you to:
 63 |     Select the most interesting and suitable plan proposed by the ChatGPT assistant.
 64 | 
 65 |     Previously written paragraph:  
 66 |     {previous_paragraph}
 67 | 
 68 |     The summary of the main storyline maintained by your ChatGPT assistant:
 69 |     {memory}
 70 | 
 71 |     The new paragraph written by your ChatGPT assistant:
 72 |     {writer_new_paragraph}
 73 | 
 74 |     Three plans of what to write next proposed by your ChatGPT assistant:
 75 |     {parse_instructions(previous_plans)}
 76 | 
 77 |     Now start choosing, organize your output by strictly following the output format as below:
 78 |       
 79 |     Selected Plan: 
 80 |     <copy the selected plan here>
 81 | 
 82 |     Reason:
 83 |     <Explain why you choose the plan>
 84 |     """
 85 |         print(prompt+'\n'+'\n')
 86 | 
 87 |         response = get_api_response(self.model, self.tokenizer, prompt)
 88 | 
 89 |         plan = self.parse_plan(response)
 90 |         while plan == None:
 91 |             response = get_api_response(self.model, self.tokenizer, prompt)
 92 |             plan = self.parse_plan(response)
 93 | 
 94 |         if response_file:
 95 |             with open(response_file, 'a', encoding='utf-8') as f:
 96 |                 f.write(f"Selected plan here:\n{response}\n\n")
 97 | 
 98 |         return plan
 99 | 
100 |     def parse_output(self, text):
101 |         try:
102 |             if text.splitlines()[0].startswith('Extended Paragraph'):
103 |                 new_paragraph = get_content_between_a_b(
104 |                     'Extended Paragraph:', 'Selected Plan', text)
105 |             else:
106 |                 new_paragraph = text.splitlines()[0]
107 | 
108 |             lines = text.splitlines()
109 |             if lines[-1] != '\n' and lines[-1].startswith('Revised Plan:'):
110 |                 revised_plan = lines[-1][len("Revised Plan:"):]
111 |             elif lines[-1] != '\n':
112 |                 revised_plan = lines[-1]
113 | 
114 |             output = {
115 |                 "output_paragraph": new_paragraph,
116 |                 # "selected_plan": selected_plan,
117 |                 "output_instruction": revised_plan,
118 |                 # "memory":self.input["output_memory"]
119 |             }
120 | 
121 |             return output
122 |         except:
123 |             return None
124 | 
125 |     def step(self, response_file=None):
126 | 
127 |         prompt = self.prepare_input()
128 |         print(prompt+'\n'+'\n')
129 | 
130 |         response = get_api_response(self.model, self.tokenizer, prompt)
131 |         self.output = self.parse_output(response)
132 |         while self.output == None:
133 |             response = get_api_response(self.model, self.tokenizer, prompt)
134 |             self.output = self.parse_output(response)
135 |         if response_file:
136 |             with open(response_file, 'a', encoding='utf-8') as f:
137 |                 f.write(f"Human's output here:\n{response}\n\n")
138 | 


--------------------------------------------------------------------------------
/imgs/webui-snapshot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackaduma/Recurrent-LLM/1a857f430efd68f5dfbd65f581191e43a1655f1d/imgs/webui-snapshot.png


--------------------------------------------------------------------------------
/init_prompt.json:
--------------------------------------------------------------------------------
1 | {"init_prompt": "\nPlease write a {type} novel about {topic} with about 50 chapters. Follow the format below precisely:\n\n    Begin with the name of the novel.\n    Next, write an outline for the first chapter. The outline should describe the background and the beginning of the novel.\n    Write the first three paragraphs with their indication of the novel based on your outline. Write in a novelistic style and take your time to set the scene.\n    Write a summary that captures the key information of the three paragraphs.\n    Finally, write three different instructions for what to write next, each containing around five sentences. Each instruction should present a possible, interesting continuation of the story.\n    The output format should follow these guidelines:\n    Name: <name of the novel>\n    Outline: <outline for the first chapter>\n    Paragraph 1: <content for paragraph 1>\n    Paragraph 2: <content for paragraph 2>\n    Paragraph 3: <content for paragraph 3>\n    Summary: <content of summary>\n    Instruction 1: <content for instruction 1>\n    Instruction 2: <content for instruction 2>\n    Instruction 3: <content for instruction 3>\n    \n    Make sure to be precise and follow the output format strictly.\n    \n    "}


--------------------------------------------------------------------------------
/misc/ali_pay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackaduma/Recurrent-LLM/1a857f430efd68f5dfbd65f581191e43a1655f1d/misc/ali_pay.png


--------------------------------------------------------------------------------
/misc/placeholder:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/misc/wechat_pay.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackaduma/Recurrent-LLM/1a857f430efd68f5dfbd65f581191e43a1655f1d/misc/wechat_pay.png


--------------------------------------------------------------------------------
/models/__init__.py:
--------------------------------------------------------------------------------
1 | #!python
2 | # -*- coding: utf-8 -*-
3 | # @author: Kun
4 | 
5 | 


--------------------------------------------------------------------------------
/models/aquila_fa.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | import os
 6 | import torch
 7 | from flagai.auto_model.auto_loader import AutoLoader
 8 | from flagai.model.predictor.predictor import Predictor
 9 | from flagai.model.predictor.aquila import aquila_generate
10 | from flagai.data.tokenizer import Tokenizer
11 | import bminf
12 | 
13 | 
14 | 
15 | max_token: int = 128 # 10000 # 64 
16 | temperature: float = 0.75
17 | top_p = 0.9
18 | 
19 | state_dict = "./checkpoints_in"
20 | model_name = 'aquilachat-7b'
21 | 
22 | def load_model():
23 |     loader = AutoLoader(
24 |         "lm",
25 |         model_dir=state_dict,
26 |         model_name=model_name,
27 |         use_cache=True,
28 |         fp16=True)
29 |     model = loader.get_model()
30 |     tokenizer = loader.get_tokenizer()
31 |     cache_dir = os.path.join(state_dict, model_name)
32 | 
33 |     model.eval()
34 | 
35 |     with torch.cuda.device(0):
36 |         model = bminf.wrapper(model, quantization=False, memory_limit=2 << 30)
37 |         
38 |     return tokenizer, model


--------------------------------------------------------------------------------
/models/aquila_hf.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | 
 6 | import torch
 7 | from transformers import AutoTokenizer, AutoModelForCausalLM
 8 | 
 9 | # trust_remote_code: remote code depends old version transformers
10 | """
11 | File "/root/.cache/huggingface/modules/transformers_modules/qhduan/aquilachat-7b/9d8fcc4f12b6bb6ea0c8a494ba85110f78804739/modeling_aquila.py", line 33, in <module>
12 |     from transformers.models.llama.configuration_llama import LlamaConfig
13 | ModuleNotFoundError: No module named 'transformers.models.llama'
14 | """
15 | def load_model():
16 |     tokenizer = AutoTokenizer.from_pretrained('qhduan/aquilachat-7b')
17 |     model = AutoModelForCausalLM.from_pretrained('qhduan/aquilachat-7b', trust_remote_code=True)
18 |     model = model.eval().half().cuda()
19 | 
20 |     return tokenizer, model


--------------------------------------------------------------------------------
/models/baichuan_hf.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | from transformers import AutoModelForCausalLM, AutoTokenizer
 6 | from peft import PeftModel
 7 | 
 8 | max_token: int = 10000  # 10000 # 64
 9 | temperature: float = 0.75
10 | top_p = 0.9
11 | use_lora = False
12 | 
13 | 
14 | # def load_model():
15 | #     model_name_or_path = "baichuan-inc/baichuan-7B"
16 | #     # model_name_or_path = "~/.cache/huggingface/hub/models--baichuan-inc--baichuan-7B/snapshots/39916f64eb892ccdc1982b0eef845b3b8fd43f6b/"
17 | #     tokenizer = AutoTokenizer.from_pretrained(
18 | #         model_name_or_path,
19 | #         trust_remote_code=True)  
20 | #     model = AutoModelForCausalLM.from_pretrained(
21 | #         model_name_or_path,
22 | #         device_map="auto",
23 | #         trust_remote_code=True)  
24 | 
25 | #     # inputs = tokenizer('登鹳雀楼->王之涣\n夜雨寄北->', return_tensors='pt')
26 | #     # inputs = inputs.to('cuda:0')
27 | #     # pred = model.generate(**inputs, max_new_tokens=64,repetition_penalty=1.1)
28 | #     # print(tokenizer.decode(pred.cpu()[0], skip_special_tokens=True))
29 | 
30 | #     return tokenizer, model
31 | 
32 | 
33 | def load_model(use_lora=True, LOAD_IN_8BIT=False):
34 |     """
35 |     params: 
36 |     use_lora=True, LOAD_IN_8BIT=False
37 |     use_lora=False. LOAD_IN_8BIT=True
38 |     """
39 |     tokenizer = AutoTokenizer.from_pretrained("baichuan-inc/baichuan-7B",
40 |                                               trust_remote_code=True)
41 |     model = AutoModelForCausalLM.from_pretrained("baichuan-inc/baichuan-7B",
42 |                                                  device_map="auto",
43 |                                                  trust_remote_code=True,
44 |                                                  load_in_8bit=LOAD_IN_8BIT, # if not have enough GPU memory, then use 8bit
45 |                                                  )
46 |     
47 |     if use_lora:
48 |         model = PeftModel.from_pretrained(model, "hiyouga/baichuan-7b-sft")
49 | 
50 |     return tokenizer, model
51 | 


--------------------------------------------------------------------------------
/models/chatglm_hf.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | import torch
 6 | from transformers import AutoTokenizer, AutoConfig, AutoModel
 7 | 
 8 | model_name_or_path = "THUDM/chatglm-6b-int8" 
 9 | max_token: int = 10000
10 | temperature: float = 0.75
11 | top_p = 0.9
12 | use_lora = False
13 | 
14 | def auto_configure_device_map(num_gpus: int, use_lora: bool):
15 |     # transformer.word_embeddings 占用1层
16 |     # transformer.final_layernorm 和 lm_head 占用1层
17 |     # transformer.layers 占用 28 层
18 |     # 总共30层分配到num_gpus张卡上
19 |     num_trans_layers = 28
20 |     per_gpu_layers = 30 / num_gpus
21 | 
22 |     # bugfix: PEFT加载lora模型出现的层命名不同
23 |     # if LLM_LORA_PATH and use_lora:
24 |     #     layer_prefix = 'base_model.model.transformer'
25 |     # else:
26 |     layer_prefix = 'transformer'
27 | 
28 |     # bugfix: 在linux中调用torch.embedding传入的weight,input不在同一device上,导致RuntimeError
29 |     # windows下 model.device 会被设置成 transformer.word_embeddings.device
30 |     # linux下 model.device 会被设置成 lm_head.device
31 |     # 在调用chat或者stream_chat时,input_ids会被放到model.device上
32 |     # 如果transformer.word_embeddings.device和model.device不同,则会导致RuntimeError
33 |     # 因此这里将transformer.word_embeddings,transformer.final_layernorm,lm_head都放到第一张卡上
34 |     device_map = {f'{layer_prefix}.word_embeddings': 0,
35 |                   f'{layer_prefix}.final_layernorm': 0, 'lm_head': 0,
36 |                   f'base_model.model.lm_head': 0, }
37 | 
38 |     used = 2
39 |     gpu_target = 0
40 |     for i in range(num_trans_layers):
41 |         if used >= per_gpu_layers:
42 |             gpu_target += 1
43 |             used = 0
44 |         assert gpu_target < num_gpus
45 |         device_map[f'{layer_prefix}.layers.{i}'] = gpu_target
46 |         used += 1
47 | 
48 |     return device_map
49 | 
50 | def load_model(llm_device="cuda", device_map=None):
51 |     tokenizer = AutoTokenizer.from_pretrained(model_name_or_path,trust_remote_code=True)
52 |     model_config = AutoConfig.from_pretrained(model_name_or_path, trust_remote_code=True)
53 |     model = AutoModel.from_pretrained(model_name_or_path, config=model_config, trust_remote_code=True)
54 |     
55 |     if torch.cuda.is_available() and llm_device.lower().startswith("cuda"):
56 |         # 根据当前设备GPU数量决定是否进行多卡部署
57 |         num_gpus = torch.cuda.device_count()
58 |         if num_gpus < 2 and device_map is None:
59 |             model = model.half().cuda()
60 |         else:
61 |             from accelerate import dispatch_model
62 | 
63 |             # model = AutoModel.from_pretrained(model_name_or_path, trust_remote_code=True,
64 |             #                                   config=model_config, **kwargs)
65 |             # 可传入device_map自定义每张卡的部署情况
66 |             if device_map is None:
67 |                 device_map = auto_configure_device_map(num_gpus, use_lora)
68 | 
69 |             model = dispatch_model(
70 |                 model.half(), device_map=device_map)
71 |     else:
72 |         model = model.float().to(llm_device)
73 | 
74 |     model = model.eval()
75 | 
76 |     return tokenizer, model


--------------------------------------------------------------------------------
/models/falcon_hf.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | 
 6 | import torch
 7 | from transformers import AutoTokenizer, AutoModelForCausalLM
 8 | from auto_gptq import AutoGPTQForCausalLM, BaseQuantizeConfig
 9 | 
10 | max_token: int = 10000  # 10000 # 64
11 | temperature: float = 0.75
12 | top_p = 0.9
13 | use_lora = False
14 | 
15 | # model_name_or_path = "Hannes-Epoch/falcon-7b-instruct-8bit" # not work, miss file
16 | 
17 | 
18 | def load_model(opt="gptq"):
19 |     if "pt" == opt:
20 |         return load_pt_model()
21 |     elif "gptq" == opt:
22 |         return load_gptq_model()
23 |     else:
24 |         raise Exception("not supported opt: {}".format(opt))
25 | 
26 | ########################################################################################################
27 | 
28 | def load_gptq_model():
29 |     model_name_or_path = "TheBloke/falcon-7b-instruct-GPTQ"
30 |     # You could also download the model locally, and access it there
31 |     # model_name_or_path = "/path/to/TheBloke_falcon-7b-instruct-GPTQ"
32 | 
33 |     model_basename = "gptq_model-4bit-64g"
34 | 
35 |     use_triton = False
36 | 
37 |     tokenizer = AutoTokenizer.from_pretrained(
38 |         model_name_or_path, use_fast=True)
39 | 
40 |     model = AutoGPTQForCausalLM.from_quantized(model_name_or_path,
41 |                                                model_basename=model_basename,
42 |                                                use_safetensors=True,
43 |                                                trust_remote_code=True,
44 |                                                device="cuda:0",
45 |                                                use_triton=use_triton,
46 |                                                quantize_config=None)
47 | 
48 |     return tokenizer, model
49 | 
50 | 
51 | ########################################################################################################
52 | 
53 | def load_pt_model():
54 |     model_name_or_path = "tiiuae/falcon-7b"
55 |     # model_name_or_path = "tiiuae/falcon-7b-instruct"
56 | 
57 |     tokenizer = AutoTokenizer.from_pretrained(
58 |         model_name_or_path,
59 |         trust_remote_code=True,
60 |     )
61 |     model = AutoModelForCausalLM.from_pretrained(
62 |         model_name_or_path,
63 |         trust_remote_code=True,
64 |         device_map='auto',
65 |         # load_in_8bit=True, # not working "RWForCausalLM.__init__() got an unexpected keyword argument 'load_in_8bit'"
66 |     )
67 | 
68 |     return tokenizer, model
69 | 
70 | ########################################################################################################


--------------------------------------------------------------------------------
/models/vicuna_bin.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | 
 6 | 
 7 | from llama_cpp import Llama, LlamaCache
 8 | from common import torch_gc
 9 | 
10 | 
11 | max_token: int = 10000
12 | temperature: float = 0.75
13 | top_p = 0.9
14 | 
15 | def load_model():
16 |     model_name_or_path = "/root/下载/ggml-vic13b-q5_1.bin" 
17 | 
18 |     params = {
19 |         'model_path': str(model_name_or_path),
20 |         'n_ctx': 2048,
21 |         'seed': 0,
22 |         'n_threads': 8,
23 |         'n_gpu_layers': 40,
24 |         'n_batch': 512,
25 |         'verbose': True,
26 |     }
27 |     model = Llama(**params)
28 |     model.set_cache(LlamaCache)
29 | 
30 |     tokenizer = model.tokenizer()
31 | 
32 |     return tokenizer, model


--------------------------------------------------------------------------------
/prompts/__init__.py:
--------------------------------------------------------------------------------
1 | #!python
2 | # -*- coding: utf-8 -*-
3 | # @author: Kun
4 | 
5 | 


--------------------------------------------------------------------------------
/prompts/chatgpt_query.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | # -*- coding: utf-8 -*-
  3 | # @author: Kun
  4 | 
  5 | def get_input_text(lang_opt, short_memory, input_paragraph, input_instruction, input_long_term_memory, new_character_prompt):
  6 |     if "en" == lang_opt:
  7 |         input_text = f"""I need you to help me write a novel. Now I give you a memory (a brief summary) of 400 words, you should use it to store the key content of what has been written so that you can keep track of very long context. For each time, I will give you your current memory (a brief summary of previous stories. You should use it to store the key content of what has been written so that you can keep track of very long context), the previously written paragraph, and instructions on what to write in the next paragraph. 
  8 |         I need you to write:
  9 |         1. Output Paragraph: the next paragraph of the novel. The output paragraph should contain around 20 sentences and should follow the input instructions.
 10 |         2. Output Memory: The updated memory. You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that you should write the updated memory. The updated memory should be similar to the input memory except the parts you previously thought that should be deleted or added. The updated memory should only store key information. The updated memory should never exceed 20 sentences!
 11 |         3. Output Instruction:  instructions of what to write next (after what you have written). You should output 3 different instructions, each is a possible interesting continuation of the story. Each output instruction should contain around 5 sentences
 12 |         Here are the inputs: 
 13 | 
 14 |         Input Memory:  
 15 |         {short_memory}
 16 | 
 17 |         Input Paragraph:
 18 |         {input_paragraph}
 19 | 
 20 |         Input Instruction:
 21 |         {input_instruction}
 22 | 
 23 |         Input Related Paragraphs:
 24 |         {input_long_term_memory}
 25 |         
 26 |         Now start writing, organize your output by strictly following the output format as below:
 27 |         Output Paragraph: 
 28 |         <string of output paragraph>, around 20 sentences.
 29 | 
 30 |         Output Memory: 
 31 |         Rational: <string that explain how to update the memory>;
 32 |         Updated Memory: <string of updated memory>, around 10 to 20 sentences
 33 | 
 34 |         Output Instruction: 
 35 |         Instruction 1: <content for instruction 1>, around 5 sentences
 36 |         Instruction 2: <content for instruction 2>, around 5 sentences
 37 |         Instruction 3: <content for instruction 3>, around 5 sentences
 38 | 
 39 |         Very important!! The updated memory should only store key information. The updated memory should never contain over 500 words!
 40 |         Finally, remember that you are writing a novel. Write like a novelist and do not move too fast when writing the output instructions for the next paragraph. Remember that the chapter will contain over 10 paragraphs and the novel will contain over 100 chapters. And this is just the beginning. Just write some interesting staffs that will happen next. Also, think about what plot can be attractive for common readers when writing output instructions. 
 41 | 
 42 |         Very Important: 
 43 |         You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that, you start rewrite the input memory to get the updated memory. 
 44 |         {new_character_prompt}
 45 |         """
 46 | 
 47 |     elif "zh1" == lang_opt:
 48 |         input_text = f"""I need you to help me write a novel. Now I give you a memory (a brief summary) of 400 words, you should use it to store the key content of what has been written so that you can keep track of very long context. For each time, I will give you your current memory (a brief summary of previous stories. You should use it to store the key content of what has been written so that you can keep track of very long context), the previously written paragraph, and instructions on what to write in the next paragraph. 
 49 |         I need you to write:
 50 |         1. Output Paragraph: the next paragraph of the novel. The output paragraph should contain around 20 sentences and should follow the input instructions.
 51 |         2. Output Memory: The updated memory. You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that you should write the updated memory. The updated memory should be similar to the input memory except the parts you previously thought that should be deleted or added. The updated memory should only store key information. The updated memory should never exceed 20 sentences!
 52 |         3. Output Instruction:  instructions of what to write next (after what you have written). You should output 3 different instructions, each is a possible interesting continuation of the story. Each output instruction should contain around 5 sentences
 53 |         4. 非常重要！请将输出信息内容全部转化为中文，注意要符合中文母语的语法和用词习惯。
 54 |         Here are the inputs: 
 55 | 
 56 |         Input Memory:  
 57 |         {short_memory}
 58 | 
 59 |         Input Paragraph:
 60 |         {input_paragraph}
 61 | 
 62 |         Input Instruction:
 63 |         {input_instruction}
 64 | 
 65 |         Input Related Paragraphs:
 66 |         {input_long_term_memory}
 67 |         
 68 |         Now start writing, organize your output by strictly following the output format as below:
 69 |         Output Paragraph: 
 70 |         <string of output paragraph>, around 20 sentences.
 71 | 
 72 |         Output Memory: 
 73 |         Rational: <string that explain how to update the memory>;
 74 |         Updated Memory: <string of updated memory>, around 10 to 20 sentences
 75 | 
 76 |         Output Instruction: 
 77 |         Instruction 1: <content for instruction 1>, around 5 sentences
 78 |         Instruction 2: <content for instruction 2>, around 5 sentences
 79 |         Instruction 3: <content for instruction 3>, around 5 sentences
 80 | 
 81 |         Very important!! The updated memory should only store key information. The updated memory should never contain over 500 words!
 82 |         Finally, remember that you are writing a novel. Write like a novelist and do not move too fast when writing the output instructions for the next paragraph. Remember that the chapter will contain over 10 paragraphs and the novel will contain over 100 chapters. And this is just the beginning. Just write some interesting staffs that will happen next. Also, think about what plot can be attractive for common readers when writing output instructions. 
 83 | 
 84 |         Very Important: 
 85 |         You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that, you start rewrite the input memory to get the updated memory. 
 86 |         非常重要！请将输出信息内容全部转化为中文，注意要符合中文母语的语法和用词习惯。
 87 |         {new_character_prompt}
 88 |         """
 89 | 
 90 |     elif "zh2" == lang_opt:
 91 |         input_text = f"""我需要你帮我写一部小说。现在我给你一个400字的记忆（一个简短的总结），你应该用它来存储已经写好的关键内容，这样你就可以记录很长的上下文。每一次，我都会给你当前的记忆（以前的故事的简要总结。你应该用它来存储所写内容的关键内容，这样你就能记下很长的上下文），之前写的段落，以及下一段要写的内容的指示。
 92 |         我需要你来写：
 93 |         1. 输出段落：小说的下一个段落。输出段应包含约20句话，并应遵循输入指示。
 94 |         2. 输出记忆： 更新后的记忆。你应该首先解释输入记忆中的哪些句子不再需要，为什么，然后解释需要添加到记忆中的内容，为什么。之后，你应该写出更新的记忆。除了你之前认为应该删除或添加的部分，更新后的记忆应该与输入的记忆相似。更新后的记忆应该只存储关键信息。更新后的记忆不应该超过20个句子！
 95 |         3. 输出指令：接下来要写什么的指令（在你写完之后）。你应该输出3个不同的指令，每个指令都是故事的一个可能的有趣的延续。每个输出指令应该包含大约5个句子
 96 |         下面是输入的内容： 
 97 | 
 98 |         输入内存：  
 99 |         {short_memory}
100 | 
101 |         输入段落：
102 |         {input_paragraph}
103 | 
104 |         输入指令：
105 |         {input_instruction}。
106 | 
107 |         输入相关段落：
108 |         {input_long_term_memory}
109 |         
110 |         现在开始写，严格按照下面的输出格式来组织你的输出：
111 |         输出段落： 
112 |         <输出段落的字符串>，大约20句话。
113 | 
114 |         输出记忆： 
115 |         理性： <解释如何更新内存的字符串>；
116 |         更新的记忆： <更新内存的字符串>，大约10到20句话
117 | 
118 |         输出指令： 
119 |         指令1：<指令1的内容>，大约5句话
120 |         指令2：<指令2的内容>，大约5句话
121 |         指令3：<指令3的内容>，大约5句话
122 | 
123 |         非常重要！! 更新的内存应该只存储关键信息。更新后的记忆不应该包含超过500个字！！！！
124 |         最后，记住你在写一本小说。像小说家一样写作，在写下一段的输出指令时不要走得太快。记住，这一章将包含10多段，而小说将包含100多章。而这仅仅是个开始。就要写一些接下来会发生的有趣的职员。另外，在写输出说明时，要考虑什么情节能吸引普通读者。
125 | 
126 |         非常重要： 
127 |         你应该首先解释输入存储器中的哪些句子不再需要，为什么，然后解释需要添加到存储器中的内容，为什么。之后，你开始重写输入内存，得到更新的内存。
128 |         {new_character_prompt}
129 |         """
130 | 
131 |     else:
132 |         raise Exception("not supported lang_opt: {}".format(lang_opt))
133 | 
134 |     return input_text


--------------------------------------------------------------------------------
/prompts/human_simulator.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | # -*- coding: utf-8 -*-
  3 | # @author: Kun
  4 | 
  5 | 
  6 | def get_input_text(lang_opt, previous_paragraph, memory, writer_new_paragraph, user_edited_plan):
  7 |     if "en" == lang_opt:
  8 |         input_text = f"""
  9 |         Now imagine you are a novelist writing a Chinese novel with the help of ChatGPT. You will be given a previously written paragraph (wrote by you), and a paragraph written by your ChatGPT assistant, a summary of the main storyline maintained by your ChatGPT assistant, and a plan of what to write next proposed by your ChatGPT assistant.
 10 |         I need you to write:
 11 |         1. Extended Paragraph: Extend the new paragraph written by the ChatGPT assistant to twice the length of the paragraph written by your ChatGPT assistant.
 12 |         2. Selected Plan: Copy the plan proposed by your ChatGPT assistant.
 13 |         3. Revised Plan: Revise the selected plan into an outline of the next paragraph.
 14 |         
 15 |         Previously written paragraph:  
 16 |         {previous_paragraph}
 17 | 
 18 |         The summary of the main storyline maintained by your ChatGPT assistant:
 19 |         {memory}
 20 | 
 21 |         The new paragraph written by your ChatGPT assistant:
 22 |         {writer_new_paragraph}
 23 | 
 24 |         The plan of what to write next proposed by your ChatGPT assistant:
 25 |         {user_edited_plan}
 26 | 
 27 |         Now start writing, organize your output by strictly following the output format as below,所有输出仍然保持是中文:
 28 |         
 29 |         Extended Paragraph: 
 30 |         <string of output paragraph>, around 40-50 sentences.
 31 | 
 32 |         Selected Plan: 
 33 |         <copy the plan here>
 34 | 
 35 |         Revised Plan:
 36 |         <string of revised plan>, keep it short, around 5-7 sentences.
 37 | 
 38 |         Very Important:
 39 |         Remember that you are writing a novel. Write like a novelist and do not move too fast when writing the plan for the next paragraph. Think about how the plan can be attractive for common readers when selecting and extending the plan. Remember to follow the length constraints! Remember that the chapter will contain over 10 paragraphs and the novel will contain over 100 chapters. And the next paragraph will be the second paragraph of the second chapter. You need to leave space for future stories.
 40 | 
 41 |         """
 42 | 
 43 |     elif "zh1" == lang_opt:
 44 |         input_text = f"""
 45 |         Now imagine you are a novelist writing a Chinese novel with the help of ChatGPT. You will be given a previously written paragraph (wrote by you), and a paragraph written by your ChatGPT assistant, a summary of the main storyline maintained by your ChatGPT assistant, and a plan of what to write next proposed by your ChatGPT assistant.
 46 |         I need you to write:
 47 |         1. Extended Paragraph: Extend the new paragraph written by the ChatGPT assistant to twice the length of the paragraph written by your ChatGPT assistant.
 48 |         2. Selected Plan: Copy the plan proposed by your ChatGPT assistant.
 49 |         3. Revised Plan: Revise the selected plan into an outline of the next paragraph.
 50 |         4. 非常重要！请将输出信息内容全部转化为中文，注意要符合中文母语的语法和用词习惯。
 51 |         
 52 |         Previously written paragraph:  
 53 |         {previous_paragraph}
 54 | 
 55 |         The summary of the main storyline maintained by your ChatGPT assistant:
 56 |         {memory}
 57 | 
 58 |         The new paragraph written by your ChatGPT assistant:
 59 |         {writer_new_paragraph}
 60 | 
 61 |         The plan of what to write next proposed by your ChatGPT assistant:
 62 |         {user_edited_plan}
 63 | 
 64 |         Now start writing, organize your output by strictly following the output format as below,所有输出仍然保持是中文:
 65 |         
 66 |         Extended Paragraph: 
 67 |         <string of output paragraph>, around 40-50 sentences.
 68 | 
 69 |         Selected Plan: 
 70 |         <copy the plan here>
 71 | 
 72 |         Revised Plan:
 73 |         <string of revised plan>, keep it short, around 5-7 sentences.
 74 | 
 75 |         Very Important:
 76 |         Remember that you are writing a novel. Write like a novelist and do not move too fast when writing the plan for the next paragraph. Think about how the plan can be attractive for common readers when selecting and extending the plan. Remember to follow the length constraints! Remember that the chapter will contain over 10 paragraphs and the novel will contain over 100 chapters. And the next paragraph will be the second paragraph of the second chapter. You need to leave space for future stories.
 77 |         非常重要！请将输出信息内容全部转化为中文，注意要符合中文母语的语法和用词习惯。
 78 |         
 79 |         """
 80 | 
 81 |     elif "zh2" == lang_opt:
 82 |         input_text = f"""
 83 |         现在想象一下，你是一个小说家，在ChatGPT的帮助下写一本中文小说。你会得到一个先前写好的段落（由你写），和一个由你的ChatGPT助手写的段落，一个由你的ChatGPT助手保持的主要故事情节的总结，以及一个由你的ChatGPT助手提出的下一步写作计划。
 84 |         我需要你写：
 85 |         1. 扩展段落： 将ChatGPT助手写的新段落延长到你的ChatGPT助手所写段落的两倍。
 86 |         2. 选定计划： 复制您的ChatGPT助手提出的计划。
 87 |         3. 修订的计划： 将选定的计划修改为下一段的纲要。
 88 |         
 89 |         以前写的段落： 
 90 |         {previous_paragraph}
 91 | 
 92 |         由你的ChatGPT助手维护的主要故事情节的摘要：
 93 |         {memory}
 94 | 
 95 |         您的ChatGPT助理写的新段落：
 96 |         {writer_new_paragraph}
 97 | 
 98 |         您的ChatGPT助理提出的下一步写作计划：
 99 |         {user_edited_plan}
100 | 
101 |         现在开始写，严格按照下面的输出格式来组织你的输出，所有输出仍然保持是中文：
102 | 
103 |         扩展段落： 
104 |         <输出段落的字符串>，大约40-50个句子。
105 | 
106 |         选定的计划： 
107 |         <在此复制计划>
108 | 
109 |         修改后的计划：
110 |         <修改后的计划字符串>，保持简短，大约5-7句话。
111 | 
112 |         非常重要：
113 |         记住你在写一本小说。像小说家一样写作，在写下一段的计划时不要走得太快。在选择和扩展计划时，要考虑计划如何对普通读者具有吸引力。记住要遵循长度限制! 记住，这一章将包含10多段，而小说将包含100多章。而下一段将是第二章的第二段。你需要为未来的故事留出空间。
114 |         
115 |         """
116 | 
117 |     else:
118 |         raise Exception("not supported lang_opt: {}".format(lang_opt))
119 | 
120 |     return input_text
121 | 


--------------------------------------------------------------------------------
/prompts/llm_query.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | # -*- coding: utf-8 -*-
  3 | # @author: Kun
  4 | 
  5 | 
  6 | def get_input_text(lang_opt, short_memory, input_paragraph, input_instruction, input_long_term_memory, new_character_prompt):
  7 |     if "en" == lang_opt:
  8 |         input_text = f"""I need you to help me write a novel. Now I give you a memory (a brief summary) of 400 words, you should use it to store the key content of what has been written so that you can keep track of very long context. For each time, I will give you your current memory (a brief summary of previous stories. You should use it to store the key content of what has been written so that you can keep track of very long context), the previously written paragraph, and instructions on what to write in the next paragraph. 
  9 |         I need you to write:
 10 |         1. Output Paragraph: the next paragraph of the novel. The output paragraph should contain around 20 sentences and should follow the input instructions.
 11 |         2. Output Memory: The updated memory. You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that you should write the updated memory. The updated memory should be similar to the input memory except the parts you previously thought that should be deleted or added. The updated memory should only store key information. The updated memory should never exceed 20 sentences!
 12 |         3. Output Instruction:  instructions of what to write next (after what you have written). You should output 3 different instructions, each is a possible interesting continuation of the story. Each output instruction should contain around 5 sentences
 13 |         Here are the inputs: 
 14 | 
 15 |         Input Memory:  
 16 |         {short_memory}
 17 | 
 18 |         Input Paragraph:
 19 |         {input_paragraph}
 20 | 
 21 |         Input Instruction:
 22 |         {input_instruction}
 23 | 
 24 |         Input Related Paragraphs:
 25 |         {input_long_term_memory}
 26 |         
 27 |         Now start writing, organize your output by strictly following the output format as below:
 28 |         Output Paragraph: 
 29 |         <string of output paragraph>, around 20 sentences.
 30 | 
 31 |         Output Memory: 
 32 |         Rational: <string that explain how to update the memory>;
 33 |         Updated Memory: <string of updated memory>, around 10 to 20 sentences
 34 | 
 35 |         Output Instruction: 
 36 |         Instruction 1: <content for instruction 1>, around 5 sentences
 37 |         Instruction 2: <content for instruction 2>, around 5 sentences
 38 |         Instruction 3: <content for instruction 3>, around 5 sentences
 39 | 
 40 |         Very important!! The updated memory should only store key information. The updated memory should never contain over 500 words!
 41 |         Finally, remember that you are writing a novel. Write like a novelist and do not move too fast when writing the output instructions for the next paragraph. Remember that the chapter will contain over 10 paragraphs and the novel will contain over 100 chapters. And this is just the beginning. Just write some interesting staffs that will happen next. Also, think about what plot can be attractive for common readers when writing output instructions. 
 42 | 
 43 |         Very Important: 
 44 |         You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that, you start rewrite the input memory to get the updated memory. 
 45 |         {new_character_prompt}
 46 |         """
 47 | 
 48 |     elif "zh1" == lang_opt:
 49 |         input_text = f"""I need you to help me write a novel. Now I give you a memory (a brief summary) of 400 words, you should use it to store the key content of what has been written so that you can keep track of very long context. For each time, I will give you your current memory (a brief summary of previous stories. You should use it to store the key content of what has been written so that you can keep track of very long context), the previously written paragraph, and instructions on what to write in the next paragraph. 
 50 |         I need you to write:
 51 |         1. Output Paragraph: the next paragraph of the novel. The output paragraph should contain around 20 sentences and should follow the input instructions.
 52 |         2. Output Memory: The updated memory. You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that you should write the updated memory. The updated memory should be similar to the input memory except the parts you previously thought that should be deleted or added. The updated memory should only store key information. The updated memory should never exceed 20 sentences!
 53 |         3. Output Instruction:  instructions of what to write next (after what you have written). You should output 3 different instructions, each is a possible interesting continuation of the story. Each output instruction should contain around 5 sentences
 54 |         4. 非常重要！请将输出信息内容全部转化为中文，注意要符合中文母语的语法和用词习惯。
 55 |         Here are the inputs: 
 56 | 
 57 |         Input Memory:  
 58 |         {short_memory}
 59 | 
 60 |         Input Paragraph:
 61 |         {input_paragraph}
 62 | 
 63 |         Input Instruction:
 64 |         {input_instruction}
 65 | 
 66 |         Input Related Paragraphs:
 67 |         {input_long_term_memory}
 68 |         
 69 |         Now start writing, organize your output by strictly following the output format as below:
 70 |         Output Paragraph: 
 71 |         <string of output paragraph>, around 20 sentences.
 72 | 
 73 |         Output Memory: 
 74 |         Rational: <string that explain how to update the memory>;
 75 |         Updated Memory: <string of updated memory>, around 10 to 20 sentences
 76 | 
 77 |         Output Instruction: 
 78 |         Instruction 1: <content for instruction 1>, around 5 sentences
 79 |         Instruction 2: <content for instruction 2>, around 5 sentences
 80 |         Instruction 3: <content for instruction 3>, around 5 sentences
 81 | 
 82 |         Very important!! The updated memory should only store key information. The updated memory should never contain over 500 words!
 83 |         Finally, remember that you are writing a novel. Write like a novelist and do not move too fast when writing the output instructions for the next paragraph. Remember that the chapter will contain over 10 paragraphs and the novel will contain over 100 chapters. And this is just the beginning. Just write some interesting staffs that will happen next. Also, think about what plot can be attractive for common readers when writing output instructions. 
 84 | 
 85 |         Very Important: 
 86 |         You should first explain which sentences in the input memory are no longer necessary and why, and then explain what needs to be added into the memory and why. After that, you start rewrite the input memory to get the updated memory. 
 87 |         非常重要！请将输出信息内容全部转化为中文，注意要符合中文母语的语法和用词习惯。
 88 |         {new_character_prompt}
 89 |         """
 90 | 
 91 |     elif "zh2" == lang_opt:
 92 |         input_text = f"""我需要你帮我写一部小说。现在我给你一个400字的记忆（一个简短的总结），你应该用它来存储已经写好的关键内容，这样你就可以记录很长的上下文。每一次，我都会给你当前的记忆（以前的故事的简要总结。你应该用它来存储所写内容的关键内容，这样你就能记下很长的上下文），之前写的段落，以及下一段要写的内容的指示。
 93 |         我需要你来写：
 94 |         1. 输出段落：小说的下一个段落。输出段应包含约20句话，并应遵循输入指示。
 95 |         2. 输出记忆： 更新后的记忆。你应该首先解释输入记忆中的哪些句子不再需要，为什么，然后解释需要添加到记忆中的内容，为什么。之后，你应该写出更新的记忆。除了你之前认为应该删除或添加的部分，更新后的记忆应该与输入的记忆相似。更新后的记忆应该只存储关键信息。更新后的记忆不应该超过20个句子！
 96 |         3. 输出指令：接下来要写什么的指令（在你写完之后）。你应该输出3个不同的指令，每个指令都是故事的一个可能的有趣的延续。每个输出指令应该包含大约5个句子
 97 |         下面是输入的内容： 
 98 | 
 99 |         输入内存：  
100 |         {short_memory}
101 | 
102 |         输入段落：
103 |         {input_paragraph}
104 | 
105 |         输入指令：
106 |         {input_instruction}。
107 | 
108 |         输入相关段落：
109 |         {input_long_term_memory}
110 |         
111 |         现在开始写，严格按照下面的输出格式来组织你的输出：
112 |         输出段落： 
113 |         <输出段落的字符串>，大约20句话。
114 | 
115 |         输出记忆： 
116 |         理性： <解释如何更新内存的字符串>；
117 |         更新的记忆： <更新内存的字符串>，大约10到20句话
118 | 
119 |         输出指令： 
120 |         指令1：<指令1的内容>，大约5句话
121 |         指令2：<指令2的内容>，大约5句话
122 |         指令3：<指令3的内容>，大约5句话
123 | 
124 |         非常重要！! 更新的内存应该只存储关键信息。更新后的记忆不应该包含超过500个字！！！！
125 |         最后，记住你在写一本小说。像小说家一样写作，在写下一段的输出指令时不要走得太快。记住，这一章将包含10多段，而小说将包含100多章。而这仅仅是个开始。就要写一些接下来会发生的有趣的职员。另外，在写输出说明时，要考虑什么情节能吸引普通读者。
126 | 
127 |         非常重要： 
128 |         你应该首先解释输入存储器中的哪些句子不再需要，为什么，然后解释需要添加到存储器中的内容，为什么。之后，你开始重写输入内存，得到更新的内存。
129 |         {new_character_prompt}
130 |         """
131 | 
132 |     else:
133 |         raise Exception("not supported lang_opt: {}".format(lang_opt))
134 |             
135 |     return input_text


--------------------------------------------------------------------------------
/prompts/service_init.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | 
 6 | def get_init_prompt(lang_opt, novel_type, description):
 7 |     if "en" == lang_opt:
 8 |         return f"""
 9 |         Please write a {novel_type} novel{description} with 50 chapters. Follow the format below precisely:
10 | 
11 |         Begin with the name of the novel.
12 |         Next, write an outline for the first chapter. The outline should describe the background and the beginning of the novel.
13 |         Write the first three paragraphs with their indication of the novel based on your outline. Write in a novelistic style and take your time to set the scene.
14 |         Write a summary that captures the key information of the three paragraphs.
15 |         Finally, write three different instructions for what to write next, each containing around five sentences. Each instruction should present a possible, interesting continuation of the story.
16 |         The output format should follow these guidelines:
17 |         Name: <name of the novel>
18 |         Outline: <outline for the first chapter>
19 |         Paragraph 1: <content for paragraph 1>
20 |         Paragraph 2: <content for paragraph 2>
21 |         Paragraph 3: <content for paragraph 3>
22 |         Summary: <content of summary>
23 |         Instruction 1: <content for instruction 1>
24 |         Instruction 2: <content for instruction 2>
25 |         Instruction 3: <content for instruction 3>
26 | 
27 |         Make sure to be precise and follow the output format strictly.
28 | 
29 |         """
30 |     elif "zh1" == lang_opt:
31 |         return f"""
32 |         Please write a {novel_type} novel{description} with 50 chapters. Follow the format below precisely:
33 | 
34 |         Begin with the name of the novel.
35 |         Next, write an outline for the first chapter. The outline should describe the background and the beginning of the novel.
36 |         Write the first three paragraphs with their indication of the novel based on your outline. Write in a novelistic style and take your time to set the scene.
37 |         Write a summary that captures the key information of the three paragraphs.
38 |         Finally, write three different instructions for what to write next, each containing around five sentences. Each instruction should present a possible, interesting continuation of the story.
39 |         The output format should follow these guidelines:
40 |         名称： <name of the novel>
41 |         概述： <outline for the first chapter>
42 |         段落1： <content for paragraph 1>
43 |         段落2： <content for paragraph 2>
44 |         段落3： <content for paragraph 3>
45 |         总结： <content of summary>
46 |         指令1： <content for instruction 1>
47 |         指令2： <content for instruction 2>
48 |         指令3：<content for instruction 3>
49 | 
50 |         Make sure to be precise and follow the output format strictly.
51 |         非常重要！请将输出信息内容全部转化为中文，注意要符合中文母语的语法和用词习惯。
52 | 
53 |         """
54 |     
55 |     elif "zh2" == lang_opt:
56 |         return f"""
57 |         请写一篇{novel_type}的小说{description}，有50个章节。准确遵循以下格式：
58 | 
59 |         以小说的名称开始。
60 |         接下来，写出第一章的大纲。大纲应描述小说的背景和开头。
61 |         根据你的提纲写出前三段，并说明小说的内容。用小说的风格来写，慢慢地设置场景。
62 |         写一个摘要，抓住这三段的关键信息。
63 |         最后，写出三个不同的指示，说明接下来要写什么，每个指示包含大约五句话。每个指示都应该提出一个可能的、有趣的故事的延续。
64 |         输出格式应遵循这些准则：
65 |         名称： <小说的名称>
66 |         概述： <第一章的大纲>
67 |         段落1： <第1段的内容>
68 |         段落2： <第2段的内容>
69 |         段落3： <第3段的内容>
70 |         总结： <摘要的内容>。
71 |         指令1： <指令1的内容>
72 |         指令2： <指令2的内容>
73 |         指令3：<指令3的内容>
74 | 
75 |         请务必准确无误，并严格遵守输出格式。
76 |         """
77 | 
78 |     else:
79 |         raise Exception(f"not supported language: {lang_opt}")


--------------------------------------------------------------------------------
/recurrent_llm.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | # -*- coding: utf-8 -*-
  3 | # @author: Kun
  4 | 
  5 | import torch
  6 | import random
  7 | from sentence_transformers import util
  8 | 
  9 | from utils import get_content_between_a_b
 10 | from prompts.llm_query import get_input_text
 11 | from global_config import lang_opt, llm_model_opt
 12 | 
 13 | if "openai" == llm_model_opt:
 14 |     from utils.openai_util import get_api_response
 15 | elif "vicuna" == llm_model_opt:
 16 |     from utils.vicuna_util import get_api_response
 17 | elif "chatglm" == llm_model_opt:
 18 |     from utils.chatglm_util import get_api_response
 19 | elif "baichuan" == llm_model_opt:
 20 |     from utils.baichuan_util import get_api_response
 21 | elif "aquila" == llm_model_opt:
 22 |     from utils.aquila_util import get_api_response
 23 | elif "falcon" == llm_model_opt:
 24 |     from utils.falcon_util import get_api_response
 25 | else:
 26 |     raise Exception("not supported llm model name: {}".format(llm_model_opt))
 27 | 
 28 | 
 29 | class RecurrentLLM:
 30 | 
 31 |     def __init__(self, input, short_memory, long_memory, memory_index, embedder, model, tokenizer):
 32 |         print("AIWriter loaded by RecurrentLLM")
 33 |         self.input = input
 34 |         self.short_memory = short_memory
 35 |         self.long_memory = long_memory
 36 |         self.embedder = embedder
 37 |         self.model = model
 38 |         self.tokenizer = tokenizer
 39 |         if self.long_memory and not memory_index:
 40 |             self.memory_index = self.embedder.encode(
 41 |                 self.long_memory, convert_to_tensor=True)
 42 |         self.output = {}
 43 | 
 44 |     def prepare_input(self, new_character_prob=0.1, top_k=2):
 45 | 
 46 |         input_paragraph = self.input["output_paragraph"]
 47 |         input_instruction = self.input["output_instruction"]
 48 | 
 49 |         instruction_embedding = self.embedder.encode(
 50 |             input_instruction, convert_to_tensor=True)
 51 | 
 52 |         # get the top 3 most similar paragraphs from memory
 53 | 
 54 |         memory_scores = util.cos_sim(
 55 |             instruction_embedding, self.memory_index)[0]
 56 |         top_k_idx = torch.topk(memory_scores, k=top_k)[1]
 57 |         top_k_memory = [self.long_memory[idx] for idx in top_k_idx]
 58 |         # combine the top 3 paragraphs
 59 |         input_long_term_memory = '\n'.join(
 60 |             [f"Related Paragraphs {i+1} :" + selected_memory for i, selected_memory in enumerate(top_k_memory)])
 61 |         # randomly decide if a new character should be introduced
 62 |         if random.random() < new_character_prob:
 63 |             new_character_prompt = f"If it is reasonable, you can introduce a new character in the output paragrah and add it into the memory."
 64 |         else:
 65 |             new_character_prompt = ""
 66 | 
 67 |         input_text = get_input_text(lang_opt, self.short_memory, input_paragraph, input_instruction, input_long_term_memory, new_character_prompt)
 68 | 
 69 |         return input_text
 70 | 
 71 |     def parse_output(self, output):
 72 |         try:
 73 |             output_paragraph = get_content_between_a_b(
 74 |                 'Output Paragraph:', 'Output Memory', output)
 75 |             output_memory_updated = get_content_between_a_b(
 76 |                 'Updated Memory:', 'Output Instruction:', output)
 77 |             self.short_memory = output_memory_updated
 78 |             ins_1 = get_content_between_a_b(
 79 |                 'Instruction 1:', 'Instruction 2', output)
 80 |             ins_2 = get_content_between_a_b(
 81 |                 'Instruction 2:', 'Instruction 3', output)
 82 |             lines = output.splitlines()
 83 |             # content of Instruction 3 may be in the same line with I3 or in the next line
 84 |             if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'):
 85 |                 ins_3 = lines[-1][len("Instruction 3:"):]
 86 |             elif lines[-1] != '\n':
 87 |                 ins_3 = lines[-1]
 88 | 
 89 |             output_instructions = [ins_1, ins_2, ins_3]
 90 |             assert len(output_instructions) == 3
 91 | 
 92 |             output = {
 93 |                 "input_paragraph": self.input["output_paragraph"],
 94 |                 "output_memory": output_memory_updated,  # feed to human
 95 |                 "output_paragraph": output_paragraph,
 96 |                 "output_instruction": [instruction.strip() for instruction in output_instructions]
 97 |             }
 98 | 
 99 |             return output
100 |         except:
101 |             return None
102 | 
103 |     def step(self, response_file=None):
104 | 
105 |         prompt = self.prepare_input()
106 | 
107 |         print(prompt+'\n'+'\n')
108 | 
109 |         response = get_api_response(self.model, self.tokenizer, prompt)
110 | 
111 |         self.output = self.parse_output(response)
112 |         while self.output == None:
113 |             response = get_api_response(self.model, self.tokenizer, prompt)
114 |             self.output = self.parse_output(response)
115 |         if response_file:
116 |             with open(response_file, 'a', encoding='utf-8') as f:
117 |                 f.write(f"Writer's output here:\n{response}\n\n")
118 | 
119 |         self.long_memory.append(self.input["output_paragraph"])
120 |         self.memory_index = self.embedder.encode(
121 |             self.long_memory, convert_to_tensor=True)
122 | 


--------------------------------------------------------------------------------
/recurrentgpt.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | # -*- coding: utf-8 -*-
  3 | # @author: Kun
  4 | 
  5 | import torch
  6 | import random
  7 | from sentence_transformers import util
  8 | 
  9 | from utils import get_content_between_a_b, get_api_response
 10 | from prompts.chatgpt_query import get_input_text
 11 | from global_config import lang_opt
 12 | 
 13 | 
 14 | class RecurrentGPT:
 15 | 
 16 |     def __init__(self, input, short_memory, long_memory, memory_index, embedder):
 17 |         print("AIWriter loaded by RecurrentGPT")
 18 |         self.input = input
 19 |         self.short_memory = short_memory
 20 |         self.long_memory = long_memory
 21 |         self.embedder = embedder
 22 |         if self.long_memory and not memory_index:
 23 |             self.memory_index = self.embedder.encode(
 24 |                 self.long_memory, convert_to_tensor=True)
 25 |         self.output = {}
 26 | 
 27 |     def prepare_input(self, new_character_prob=0.1, top_k=2):
 28 | 
 29 |         input_paragraph = self.input["output_paragraph"]
 30 |         input_instruction = self.input["output_instruction"]
 31 | 
 32 |         instruction_embedding = self.embedder.encode(
 33 |             input_instruction, convert_to_tensor=True)
 34 | 
 35 |         # get the top 3 most similar paragraphs from memory
 36 | 
 37 |         memory_scores = util.cos_sim(
 38 |             instruction_embedding, self.memory_index)[0]
 39 |         top_k_idx = torch.topk(memory_scores, k=top_k)[1]
 40 |         top_k_memory = [self.long_memory[idx] for idx in top_k_idx]
 41 |         # combine the top 3 paragraphs
 42 |         input_long_term_memory = '\n'.join(
 43 |             [f"Related Paragraphs {i+1} :" + selected_memory for i, selected_memory in enumerate(top_k_memory)])
 44 |         # randomly decide if a new character should be introduced
 45 |         if random.random() < new_character_prob:
 46 |             new_character_prompt = f"If it is reasonable, you can introduce a new character in the output paragrah and add it into the memory."
 47 |         else:
 48 |             new_character_prompt = ""
 49 | 
 50 |         input_text = get_input_text(lang_opt, self.short_memory, input_paragraph, input_instruction, input_long_term_memory, new_character_prompt)
 51 |         
 52 |         return input_text
 53 | 
 54 |     def parse_output(self, output):
 55 |         try:
 56 |             output_paragraph = get_content_between_a_b(
 57 |                 'Output Paragraph:', 'Output Memory', output)
 58 |             output_memory_updated = get_content_between_a_b(
 59 |                 'Updated Memory:', 'Output Instruction:', output)
 60 |             self.short_memory = output_memory_updated
 61 |             ins_1 = get_content_between_a_b(
 62 |                 'Instruction 1:', 'Instruction 2', output)
 63 |             ins_2 = get_content_between_a_b(
 64 |                 'Instruction 2:', 'Instruction 3', output)
 65 |             lines = output.splitlines()
 66 |             # content of Instruction 3 may be in the same line with I3 or in the next line
 67 |             if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'):
 68 |                 ins_3 = lines[-1][len("Instruction 3:"):]
 69 |             elif lines[-1] != '\n':
 70 |                 ins_3 = lines[-1]
 71 | 
 72 |             output_instructions = [ins_1, ins_2, ins_3]
 73 |             assert len(output_instructions) == 3
 74 | 
 75 |             output = {
 76 |                 "input_paragraph": self.input["output_paragraph"],
 77 |                 "output_memory": output_memory_updated,  # feed to human
 78 |                 "output_paragraph": output_paragraph,
 79 |                 "output_instruction": [instruction.strip() for instruction in output_instructions]
 80 |             }
 81 | 
 82 |             return output
 83 |         except:
 84 |             return None
 85 | 
 86 |     def step(self, response_file=None):
 87 | 
 88 |         prompt = self.prepare_input()
 89 | 
 90 |         print(prompt+'\n'+'\n')
 91 | 
 92 |         response = get_api_response(prompt)
 93 | 
 94 |         self.output = self.parse_output(response)
 95 |         while self.output == None:
 96 |             response = get_api_response(prompt)
 97 |             self.output = self.parse_output(response)
 98 |         if response_file:
 99 |             with open(response_file, 'a', encoding='utf-8') as f:
100 |                 f.write(f"Writer's output here:\n{response}\n\n")
101 | 
102 |         self.long_memory.append(self.input["output_paragraph"])
103 |         self.memory_index = self.embedder.encode(
104 |             self.long_memory, convert_to_tensor=True)
105 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | pip install -U sentence-transformers
 2 | pip install --upgrade openai
 3 | 
 4 | 
 5 | pip install bitsandbytes==0.39.0
 6 | pip install transformers@git+https://github.com/huggingface/transformers.git
 7 | pip install peft@git+https://github.com/huggingface/peft.git
 8 | pip install accelerate@git+https://github.com/huggingface/accelerate.git
 9 | 
10 | 
11 | llama-cpp-python@git+https://github.com/abetlen/llama-cpp-python.git
12 | 
13 | 
14 | pip install -U flagai
15 | pip install bminf
16 | 
17 | 
18 | pip install auto-gptq
19 | pip install einops


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
  1 | #!python
  2 | # -*- coding: utf-8 -*-
  3 | # @author: Kun
  4 | 
  5 | import re
  6 | from global_config import lang_opt, llm_model_opt
  7 | 
  8 | if "openai" == llm_model_opt:
  9 |     from utils.openai_util import get_api_response
 10 | elif "vicuna" == llm_model_opt:
 11 |     from utils.vicuna_util import get_api_response
 12 | elif "chatglm" == llm_model_opt:
 13 |     from utils.chatglm_util import get_api_response
 14 | elif "baichuan" == llm_model_opt:
 15 |     from utils.baichuan_util import get_api_response
 16 | elif "aquila" == llm_model_opt:
 17 |     from utils.aquila_util import get_api_response
 18 | elif "falcon" == llm_model_opt:
 19 |     from utils.falcon_util import get_api_response
 20 | else:
 21 |     raise Exception("not supported llm model name: {}".format(llm_model_opt))
 22 | 
 23 | 
 24 | def get_content_between_a_b(a, b, text):
 25 |     if "en" == lang_opt:
 26 |         if "vicuna" == llm_model_opt:
 27 |             return re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL).group(1).strip()
 28 |         elif "openai" == llm_model_opt:
 29 |             return re.search(f"{a}(.*?)\n{b}", text, re.DOTALL).group(1).strip()
 30 |         elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]:
 31 |             return re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL).group(1).strip()
 32 |         else:
 33 |             raise Exception(
 34 |                 "not supported llm model name: {}".format(llm_model_opt))
 35 | 
 36 |     elif lang_opt in ["zh1", "zh2"]:
 37 |         if "vicuna" == llm_model_opt:
 38 |             match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL)
 39 |         elif "openai" == llm_model_opt:
 40 |             match = re.search(f"{a}(.*?)\n{b}", text, re.DOTALL)
 41 |         elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]:
 42 |             match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL)
 43 |         else:
 44 |             raise Exception(
 45 |                 "not supported llm model name: {}".format(llm_model_opt))
 46 | 
 47 |         if match:
 48 |             return match.group(1).strip()
 49 |         else:
 50 |             if "1" in a or "2" in a or "3" in a:
 51 |                 a = ''.join(a.split(" "))
 52 |             if "1" in b or "2" in b or "3" in b:
 53 |                 b = "".join(b.split(" "))
 54 | 
 55 |             if "vicuna" == llm_model_opt:
 56 |                 match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL)
 57 |             elif "openai" == llm_model_opt:
 58 |                 match = re.search(f"{a}(.*?)\n{b}", text, re.DOTALL)
 59 |             elif llm_model_opt in ["chatglm", "baichuan", "aquila", "falcon"]:
 60 |                 match = re.search(f"{a}(.*?)\n(.*?){b}", text, re.DOTALL)
 61 |             else:
 62 |                 raise Exception(
 63 |                     "not supported llm model name: {}".format(llm_model_opt))
 64 | 
 65 |             if match:
 66 |                 return match.group(1).strip()
 67 |             else:
 68 |                 # 处理找不到匹配内容的情况
 69 |                 return "翻译时出现错误请重试"  # 或者返回其他默认值或采取其他的处理方式
 70 |     else:
 71 |         raise Exception(f"not supported language: {lang_opt}")
 72 | 
 73 | 
 74 | def get_init(init_text=None, text=None, response_file=None, model=None, tokenizer=None):
 75 |     """
 76 |     init_text: if the title, outline, and the first 3 paragraphs are given in a .txt file, directly read
 77 |     text: if no .txt file is given, use init prompt to generate
 78 |     """
 79 |     if not init_text:
 80 |         response = get_api_response(model, tokenizer, text)
 81 |         print("response: {}".format(response))
 82 | 
 83 |         if response_file:
 84 |             with open(response_file, 'a', encoding='utf-8') as f:
 85 |                 f.write(f"Init output here:\n{response}\n\n")
 86 |     else:
 87 |         with open(init_text, 'r', encoding='utf-8') as f:
 88 |             response = f.read()
 89 |         f.close()
 90 |     paragraphs = {
 91 |         "name": "",
 92 |         "Outline": "",
 93 |         "Paragraph 1": "",
 94 |         "Paragraph 2": "",
 95 |         "Paragraph 3": "",
 96 |         "Summary": "",
 97 |         "Instruction 1": "",
 98 |         "Instruction 2": "",
 99 |         "Instruction 3": ""
100 |     }
101 | 
102 |     if "en" == lang_opt:
103 |         paragraphs['name'] = get_content_between_a_b(
104 |             'Name:', 'Outline', response)
105 | 
106 |         paragraphs['Paragraph 1'] = get_content_between_a_b(
107 |             'Paragraph 1:', 'Paragraph 2:', response)
108 |         paragraphs['Paragraph 2'] = get_content_between_a_b(
109 |             'Paragraph 2:', 'Paragraph 3:', response)
110 |         paragraphs['Paragraph 3'] = get_content_between_a_b(
111 |             'Paragraph 3:', 'Summary', response)
112 |         paragraphs['Summary'] = get_content_between_a_b(
113 |             'Summary:', 'Instruction 1', response)
114 |         paragraphs['Instruction 1'] = get_content_between_a_b(
115 |             'Instruction 1:', 'Instruction 2', response)
116 |         paragraphs['Instruction 2'] = get_content_between_a_b(
117 |             'Instruction 2:', 'Instruction 3', response)
118 |         lines = response.splitlines()
119 |         # content of Instruction 3 may be in the same line with I3 or in the next line
120 |         if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'):
121 |             paragraphs['Instruction 3'] = lines[-1][len("Instruction 3:"):]
122 |         elif lines[-1] != '\n':
123 |             paragraphs['Instruction 3'] = lines[-1]
124 |         # Sometimes it gives Chapter outline, sometimes it doesn't
125 |         for line in lines:
126 |             if line.startswith('Chapter'):
127 |                 paragraphs['Outline'] = get_content_between_a_b(
128 |                     'Outline:', 'Chapter', response)
129 |                 break
130 |         if paragraphs['Outline'] == '':
131 |             paragraphs['Outline'] = get_content_between_a_b(
132 |                 'Outline:', 'Paragraph', response)
133 | 
134 |     elif lang_opt in ["zh1", "zh2"]:
135 |         paragraphs['name'] = get_content_between_a_b('名称：', '概述：', response)
136 | 
137 |         paragraphs['Paragraph 1'] = get_content_between_a_b(
138 |             '段落 1：', '段落 2：', response)
139 |         paragraphs['Paragraph 2'] = get_content_between_a_b(
140 |             '段落 2：', '段落 3：', response)
141 |         paragraphs['Paragraph 3'] = get_content_between_a_b(
142 |             '段落 3：', '总结：', response)
143 |         paragraphs['Summary'] = get_content_between_a_b(
144 |             '总结：', '指令 1', response)
145 |         paragraphs['Instruction 1'] = get_content_between_a_b(
146 |             '指令 1：', '指令 2：', response)
147 |         paragraphs['Instruction 2'] = get_content_between_a_b(
148 |             '指令 2：', '指令 3：', response)
149 |         lines = response.splitlines()
150 |         # content of Instruction 3 may be in the same line with I3 or in the next line
151 |         if lines[-1] != '\n' and lines[-1].startswith('Instruction 3'):
152 |             paragraphs['Instruction 3'] = lines[-1][len("Instruction 3:"):]
153 |         elif lines[-1] != '\n':
154 |             paragraphs['Instruction 3'] = lines[-1]
155 |         # Sometimes it gives Chapter outline, sometimes it doesn't
156 |         for line in lines:
157 |             if line.startswith('Chapter'):
158 |                 paragraphs['Outline'] = get_content_between_a_b(
159 |                     '概述：', 'Chapter', response)
160 |                 break
161 |         if paragraphs['Outline'] == '':
162 |             paragraphs['Outline'] = get_content_between_a_b(
163 |                 '概述：', '段落', response)
164 | 
165 |     return paragraphs
166 | 
167 | 
168 | def get_chatgpt_response(model, prompt):
169 |     response = ""
170 |     for data in model.ask(prompt):
171 |         response = data["message"]
172 |     model.delete_conversation(model.conversation_id)
173 |     model.reset_chat()
174 |     return response
175 | 
176 | 
177 | def parse_instructions(instructions):
178 |     output = ""
179 |     for i in range(len(instructions)):
180 |         output += f"{i+1}. {instructions[i]}\n"
181 |     return output
182 | 


--------------------------------------------------------------------------------
/utils/aquila_util.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | 
 6 | import torch
 7 | from flagai.model.predictor.predictor import Predictor
 8 | from flagai.model.predictor.aquila import aquila_generate
 9 | from models.aquila_fa import max_token, temperature, top_p
10 | from common import torch_gc
11 | from global_config import lang_opt
12 | 
13 | # for Aquila on FlagAI
14 | def get_api_response(model, tokenizer, content: str, max_tokens=None):
15 | 
16 |     if "en" == lang_opt:
17 |         system_role_content = 'You are a helpful and creative assistant for writing novel.'
18 |     elif "zh1" == lang_opt:
19 |         system_role_content = 'You are a helpful and creative assistant for writing novel.\
20 |                 You are must always in Chinese.重要，你需要使用中文与我进行交流。'
21 |     elif "zh2" == lang_opt:
22 |         system_role_content = '你是写小说的好帮手，有创意的助手。'
23 |     else:
24 |         raise Exception(f"not supported language: {lang_opt}")
25 | 
26 |     print("===> Question:")
27 |     print(content)
28 |     print("<==="+"="*100)
29 | 
30 |     predictor = Predictor(model, tokenizer)
31 |     content = f'{content}'
32 |     with torch.no_grad():
33 |         out = predictor.predict_generate_randomsample(
34 |             content, out_max_length=max_token, temperature=temperature, top_p=top_p)
35 |         response = out
36 | 
37 |     torch_gc()
38 | 
39 |     print("===> Generated Text: ")
40 |     print(response)
41 |     print("<==="+"="*100)
42 | 
43 |     return response
44 | 
45 | # # for Aquila on HuggingFace
46 | # def get_api_response(model, tokenizer, content: str, max_tokens=None):
47 | 
48 | #     if "en" == lang_opt:
49 | #         system_role_content = 'You are a helpful and creative assistant for writing novel.'
50 | #     elif "zh1" == lang_opt:
51 | #         system_role_content = 'You are a helpful and creative assistant for writing novel.\
52 | #                 You are must always in Chinese.重要，你需要使用中文与我进行交流。'
53 | #     elif "zh2" == lang_opt:
54 | #         system_role_content = '你是写小说的好帮手，有创意的助手。'
55 | #     else:
56 | #         raise Exception(f"not supported language: {lang_opt}")
57 | 
58 | #     print("===> Question:")
59 | #     print(content)
60 | #     print("<==="+"="*100)
61 | 
62 | #     with torch.no_grad():
63 | #         ret = model.generate(
64 | #             **tokenizer(content, return_tensors='pt').to('cuda'),
65 | #             do_sample=False,
66 | #             max_new_tokens=max_token,
67 | #             temperature=temperature,
68 | #             top_p=top_p,
69 | #             use_cache=True
70 | #         )
71 | #         output_ids = ret[0].detach().cpu().numpy().tolist()
72 | #         if 100007 in output_ids:
73 | #             output_ids = output_ids[:output_ids.index(100007)]
74 | #         elif 0 in output_ids:
75 | #             output_ids = output_ids[:output_ids.index(0)]
76 | #         response = tokenizer.decode(output_ids)
77 | 
78 | #     torch_gc()
79 | 
80 | #     print("===> Generated Text: ")
81 | #     print(response)
82 | #     print("<==="+"="*100)
83 | 
84 | #     return response
85 | 


--------------------------------------------------------------------------------
/utils/baichuan_util.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | from transformers import TextStreamer
 6 | 
 7 | from models.baichuan_hf import max_token, temperature, top_p
 8 | from common import torch_gc
 9 | from global_config import lang_opt
10 | 
11 | 
12 | def get_api_response(model, tokenizer, content: str, max_tokens=None):
13 | 
14 |     if "en" == lang_opt:
15 |         system_role_content = 'You are a helpful and creative assistant for writing novel.'
16 |     elif "zh1" == lang_opt:
17 |         system_role_content = 'You are a helpful and creative assistant for writing novel.\
18 |                 You are must always in Chinese.重要，你需要使用中文与我进行交流。'
19 |     elif "zh2" == lang_opt:
20 |         system_role_content = '你是写小说的好帮手，有创意的助手。'
21 |     else:
22 |         raise Exception(f"not supported language: {lang_opt}")
23 | 
24 |     print("===> Question:")
25 |     print(content)
26 |     print("<==="+"="*100)
27 | 
28 |     streamer = TextStreamer(tokenizer, 
29 |                             skip_prompt=True,
30 |                             skip_special_tokens=True
31 |                             )
32 | 
33 |     # inputs = tokenizer(content, return_tensors='pt')
34 |     inputs = tokenizer("<human>:{}\n<bot>:".format(content), return_tensors='pt')
35 |     # inputs = inputs.to('cuda') # UserWarning: You are calling .generate() with the `input_ids` being on a device type different than your model's device. `input_ids` is on cuda, whereas the model is on cpu. You may experience unexpected behaviors or slower generation. Please make sure that you have put `input_ids` to the correct device by calling for example input_ids = input_ids.to('cpu') before running `.generate()`.
36 |     inputs = inputs.to('cpu')
37 |     generate_ids = model.generate(**inputs,
38 |                                   max_new_tokens=max_token,
39 |                                   top_p=top_p,
40 |                                   temperature=temperature,
41 |                                   repetition_penalty=1.1,
42 |                                   streamer=streamer,
43 |                                   )
44 |     response = tokenizer.decode(
45 |         generate_ids.cpu()[0], skip_special_tokens=True)
46 | 
47 |     torch_gc()
48 | 
49 |     print("===> Generated Text: ")
50 |     print(response)
51 |     print("<==="+"="*100)
52 | 
53 |     return response
54 | 


--------------------------------------------------------------------------------
/utils/chatglm_util.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | 
 6 | from models.chatglm_hf import max_token, temperature, top_p
 7 | from common import torch_gc
 8 | from global_config import lang_opt
 9 | 
10 | 
11 | def get_api_response(model, tokenizer, content: str, max_tokens=None):
12 | 
13 |     if "en" == lang_opt:
14 |         system_role_content = 'You are a helpful and creative assistant for writing novel.'
15 |     elif "zh1" == lang_opt:
16 |         system_role_content = 'You are a helpful and creative assistant for writing novel.\
17 |                 You are must always in Chinese.重要，你需要使用中文与我进行交流。'
18 |     elif "zh2" == lang_opt:
19 |         system_role_content = '你是写小说的好帮手，有创意的助手。'
20 |     else:
21 |         raise Exception(f"not supported language: {lang_opt}")
22 | 
23 |     print("===> Question:")
24 |     print(content)
25 |     print("<==="+"="*100)
26 | 
27 |     response, history = model.chat(
28 |         tokenizer,
29 |         content,
30 |         history=[],
31 |         max_length=max_token,
32 |         temperature=temperature,
33 |         top_p=top_p,
34 |     )
35 | 
36 |     torch_gc()
37 | 
38 |     print("===> Generated Text: ")
39 |     print(response)
40 |     print("<==="+"="*100)
41 | 
42 |     return response
43 | 


--------------------------------------------------------------------------------
/utils/falcon_util.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | from models.falcon_hf import max_token, temperature, top_p
 6 | from common import torch_gc
 7 | from global_config import lang_opt
 8 | 
 9 | 
10 | def get_api_response(model, tokenizer, content: str, max_tokens=None):
11 | 
12 |     if "en" == lang_opt:
13 |         system_role_content = 'You are a helpful and creative assistant for writing novel.'
14 |     elif "zh1" == lang_opt:
15 |         system_role_content = 'You are a helpful and creative assistant for writing novel.\
16 |                 You are must always in Chinese.重要，你需要使用中文与我进行交流。'
17 |     elif "zh2" == lang_opt:
18 |         system_role_content = '你是写小说的好帮手，有创意的助手。'
19 |     else:
20 |         raise Exception(f"not supported language: {lang_opt}")
21 | 
22 |     print("===> Question:")
23 |     print(content)
24 |     print("<==="+"="*100)
25 | 
26 |     inputs = tokenizer(content,
27 |                        return_tensors='pt',
28 |                        return_token_type_ids=False, #  ValueError: The following model_kwargs are not used by the model: ['token_type_ids'] (note: typos in the generate arguments will also show up in this list)
29 |                        )
30 |     inputs = inputs.to('cuda:0')
31 |     output = model.generate(**inputs,
32 |                           max_new_tokens=max_token,
33 |                           top_p=top_p,
34 |                           temperature=temperature,
35 |                           repetition_penalty=1.1,
36 |                         #   eos_token_id=tokenizer.eos_token_id,
37 |                           )
38 |     response = tokenizer.decode(output.cpu()[0], skip_special_tokens=True)
39 | 
40 |     torch_gc()
41 | 
42 |     print("===> Generated Text: ")
43 |     print(response)
44 |     print("<==="+"="*100)
45 | 
46 |     return response
47 | 


--------------------------------------------------------------------------------
/utils/openai_util.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | 
 6 | import openai
 7 | 
 8 | from global_config import lang_opt
 9 | 
10 | 
11 | def get_api_response(model, tokenizer, content: str, max_tokens=None):
12 | 
13 |     if "en" == lang_opt:
14 |         system_role_content = 'You are a helpful and creative assistant for writing novel.'
15 |     elif "zh1" == lang_opt:
16 |         system_role_content = 'You are a helpful and creative assistant for writing novel.\
17 |                 You are must always in Chinese.重要，你需要使用中文与我进行交流。'
18 |     elif "zh2" == lang_opt:
19 |         system_role_content = '你是写小说的好帮手，有创意的助手。'
20 |     else:
21 |         raise Exception(f"not supported language: {lang_opt}")
22 | 
23 |     response = openai.ChatCompletion.create(
24 |         model='gpt-3.5-turbo',
25 |         messages=[{
26 |             'role': 'system',
27 |             'content': system_role_content
28 |         }, {
29 |             'role': 'user',
30 |             'content': content,
31 |         }],
32 |         temperature=0.5,
33 |         max_tokens=max_tokens
34 |     )
35 | 
36 |     return response['choices'][0]['message']['content']
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/utils/vicuna_util.py:
--------------------------------------------------------------------------------
 1 | #!python
 2 | # -*- coding: utf-8 -*-
 3 | # @author: Kun
 4 | 
 5 | 
 6 | from models.vicuna_bin import max_token, temperature, top_p
 7 | from common import torch_gc
 8 | from global_config import lang_opt
 9 | 
10 | 
11 | def get_api_response(model, tokenizer, content: str, max_tokens=None):
12 | 
13 |     if "en" == lang_opt:
14 |         system_role_content = 'You are a helpful and creative assistant for writing novel.'
15 |     elif "zh1" == lang_opt:
16 |         system_role_content = 'You are a helpful and creative assistant for writing novel.\
17 |                 You are must always in Chinese.重要，你需要使用中文与我进行交流。'
18 |     elif "zh2" == lang_opt:
19 |         system_role_content = '你是写小说的好帮手，有创意的助手。'
20 |     else:
21 |         raise Exception(f"not supported language: {lang_opt}")
22 | 
23 |     print("===> Question:")
24 |     print(content)
25 |     print("<==="+"="*100)
26 | 
27 |     content = content.encode()
28 |     tokens = model.tokenize(content)
29 | 
30 |     output = b""
31 |     count = 0
32 |     token_count = 10000
33 |     top_k = 40
34 |     repetition_penalty = 1.1
35 |     for token in model.generate(tokens,
36 |                                 top_k=top_k,
37 |                                 top_p=top_p,
38 |                                 temp=temperature,
39 |                                 repeat_penalty=repetition_penalty):
40 |         text = model.detokenize([token])
41 |         # print(text)
42 |         output += text
43 | 
44 |         count += 1
45 |         if count >= token_count or (token == model.token_eos()):
46 |             break
47 | 
48 |     response = output.decode()
49 |     # print("===> [vicuna][generate] response: {}".format(response))
50 | 
51 |     torch_gc()
52 | 
53 |     print("===> Generated Text: ")
54 |     print(response)
55 |     print("<==="+"="*100)
56 | 
57 |     return response
58 | 
59 | 
60 | 


--------------------------------------------------------------------------------