├── .env.template ├── LICENSE ├── README.md ├── app.py ├── assets ├── 47bec94a.mp4 ├── ai-init.png ├── api_head.png ├── e1.jpeg ├── e2.png ├── model_head.png ├── nlsom.log ├── nlsom.png ├── nlsom.svg ├── role_play_head.png ├── ui.jpeg ├── validators-0.20.0.zip └── vqa_instance.png ├── env ├── __init__.py ├── prompt.py └── recommendation.py ├── nlsom.yaml ├── requirements.txt ├── setting.py ├── society ├── audio_recognition │ └── agent.py ├── body_reshaping │ └── agent.py ├── community.py ├── image_captioning │ └── agent.py ├── image_colorization │ └── agent.py ├── image_deblur │ └── agent.py ├── image_to_3d │ └── agent.py ├── object_detection │ └── agent.py ├── ocr │ └── agent.py ├── role_play │ └── agent.py ├── search │ └── agent.py ├── sentence_refine │ └── agent.py ├── skin_retouching │ └── agent.py ├── text_to_image │ └── agent.py ├── text_to_speech │ └── agent.py ├── text_to_video │ └── agent.py └── vqa │ └── agent.py └── utils.py /.env.template: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY = "" 2 | HUGGINGFACE_ACCESS_Tokens = "" 3 | REPLICATE_API_TOKEN = "" 4 | BING_SUBSCRIPTION_KEY = "" 5 | BING_SEARCH_URL = "" 6 | WOLFRAM_ALPHA_APPID = "" 7 | MODELSCOPE_GIT_TOKEN = "" 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 mczhuge 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #

`Natural Language-Based Societies of Mind`

2 | ![overview](assets/nlsom.svg) 3 | > What magical trick makes us intelligent? The trick is that there is no trick. The power of intelligence stems from our vast diversity, not from any single, perfect principle. — Marvin Minsky, The Society of Mind, p. 308 4 | 5 | [![arXiv](https://img.shields.io/badge/arXiv-Paper-.svg)](https://arxiv.org/pdf/2305.17066.pdf) 6 | [![GitHub license](https://img.shields.io/badge/License-MIT-orange.svg)](https://github.com/mczhuge/NLSOM/blob/main/LICENSE) 7 | [![Hits](https://hits.seeyoufarm.com/api/count/incr/badge.svg?url=https%3A%2F%2Fgithub.com%2Fmczhuge%2FNLSOM&count_bg=%23543DC8&title_bg=%23555555&icon=awesomelists.svg&icon_color=%23E7E7E7&title=hits&edge_flat=false)](https://hits.seeyoufarm.com) 8 | 9 | ## ✨ Introduction 10 | We introduce the **Natural Language-Based Societies of Mind (NLSOM)** concept, which contains societies and communities of agents. 11 | 12 | #### 🔥 News: 13 | - NLSOM is accepted by CVMJ 2025. 14 | - NLSOM got **Best Paper Award** in [NeurIPS 2023 Ro-FoMo Workshop](https://sites.google.com/view/r0-fomo/accepted-papers?authuser=0)!! 15 | - [Dylan R. Ashley](https://dylanashley.io) will give a presentation of NLSOM in [NeurIPS RO-FoMo](https://sites.google.com/view/r0-fomo/accepted-papers?authuser=0) workshop. See our [poster](https://metauto.ai/images/nlsom-v9.pdf). 16 | - [This position paper](https://arxiv.org/pdf/2305.17066.pdf) marks the beginning. Our vision continues to unfold and grow stronger! 17 | - **We finished this repo in early May but was released 7 months later**. 18 | 19 | #### 1. Concepts: 20 | - Agents can be either LLMs, NN-based experts, APIs and role-players. They all communicate in natural language. 21 | - To solve tasks, these agents use a collaborative "Mindstorm" process involving mutual interviews. 22 | - Additional components for NLSOM can be easily added in a modular way. 23 | -
24 | More insights 👈 [CLICK] 25 |

26 |

29 |

30 |

31 |

35 |

36 |

37 |

41 |

42 |
43 | 44 | #### 2. About this repo: 45 | This project is the **technical extension** for the original [NLSOM paper](https://arxiv.org/pdf/2305.17066.pdf), including: 46 | 47 | - **🧰 Recommendation**: Autonomously select communities and agents to form a self-organized NLSOM for solving the specified task. 48 | - **🧠 Mindstorm**: Multiple agents (models or APIs) can collaborate to solve tasks together more efficiently. 49 | - **💰 Reward**: Rewards are given to all agents involved. 50 | 51 | #### 3. Features: 52 | - [x] Manage Easily: Simply change the template to organize your NLSOM in different areas. 53 | - [x] Easy to extend: customize your own community and agents (Now we have 16 communities and 34 agents, see [society](https://github.com/mczhuge/NLSOM/tree/main/society)). 54 | - [x] Reward Design: provide a reward mechanism (albeit rough). You can easily upgrade to a more refined version. 55 | - [x] Elegant UI: has an interface and support for diverse file sources (image, text, audio, video, etc). 56 | 57 | 58 |
59 | 60 |
61 | 62 | 63 | 64 | 65 | ## 💾 Usage 66 | 67 | ### 1. Install 68 | 69 | Choose from three different installation methods to find the one that best fits your needs. 70 | 71 | 1. CONDA: ``conda env create -n nlsom -f nlsom.yaml`` 72 | 73 | 2. PIP: ``conda create -n nlsom python=3.8`` and then ``pip install -r requirements.txt`` 74 | 75 |
3. Step-by-step installation (Recommended and more controllable) 76 |

77 | 78 | ```bash 79 | # [Set Conda Env] 80 | conda create -n nlsom python=3.8 81 | conda install pytorch==1.10.1 torchvision==0.11.2 torchaudio==0.10.1 -c pytorch 82 | pip install pandas==1.4.3 83 | # [Set LangChain, OpenAI] 84 | pip install langchain==0.0.158 85 | pip install sqlalchemy==2.0.12 86 | pip install openai 87 | pip install colorama 88 | # [Set Streamlit] 89 | cd assets && unzip validators-0.20.0.zip 90 | cd validators-0.20.0 91 | python setup.py build 92 | python setup.py install 93 | pip install streamlit==1.22.0 94 | pip install streamlit_chat==0.0.2.2 95 | pip install soundfile 96 | # [Set Huggingface/transformers] 97 | pip install transformers==4.29.2 98 | pip install accelerate==0.19.0 99 | # [Set Search] 100 | pip install wolframalpha 101 | pip install wikipedia 102 | pip install arxiv 103 | # [Set Modelscope] 104 | pip install modelscope==1.6.0 105 | python3 -m pip install nvidia-cudnn-cu11==8.6.0.163 tensorflow==2.12.* 106 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CONDA_PREFIX/lib/:$CUDNN_PATH/lib 107 | python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" 108 | pip install modelscope[multi-modal] 109 | pip install decord==0.6.0 110 | pip install fairseq 111 | pip install librosa 112 | pip install setuptools==59.5.0 113 | pip install tensorboardX 114 | pip install open_clip_torch 115 | # [Set OCR] 116 | pip install easyocr 117 | # [Set Text-to-Video] 118 | pip install replicate==0.8.3 119 | # [Set Image-to-3D] 120 | pip install trimesh 121 | pip3 install pymcubes 122 | # [Set TTS] - not recommended due to environmental conflicts 123 | pip install TTS 124 | pip install protobuf==3.20.3 125 | ``` 126 | 127 |

128 |
129 | 130 |
Optional. Manage the dir of checkpoints 131 |

132 | 133 | * Create the checkpoints dir 134 | ``` 135 | mkdir checkpoints && cd checkpoints 136 | mkdir huggingface 137 | mkdir modelscope 138 | ``` 139 | 140 | * Change Huggingface's setting 141 | ```bash 142 | >>> import transformers 143 | >>> print(transformers.__file__) 144 | # Get the path: {YOUR_ANACONDA_PATH}/envs/nlsom/lib/python3.8/site-packages/transformers/__init__.py 145 | ``` 146 | 147 | Open the ``{YOUR_ANACONDA_PATH}/envs/nlsom/lib/python3.8/site-packages/transformers/utils/hub.py`` and change the line: 148 | ``` 149 | torch_cache_home = os.getenv("TORCH_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "{YOUR_NLSOM_PATH}/checkpoints"), "torch")) 150 | hf_cache_home = os.path.expanduser( 151 | os.getenv("HF_HOME", os.path.join(os.getenv("XDG_CACHE_HOME", "{YOUR_NLSOM_PATH}/checkpoints"), "huggingface")) 152 | ) 153 | ``` 154 | 155 | * Similarly, the modelscope's setting 156 | 157 | ```bash 158 | >>> import modelscope 159 | >>> print(modelscope.__file__) 160 | # Get the path: ${YOUR_ANACONDA_PATH}/envs/nlsom/lib/python3.8/site-packages/modelscope/__init__.py 161 | ``` 162 | 163 | Open ``{YOUR_ANACONDA_PATH}/envs/nlsom/lib/python3.8/site-packages/modelscope/utils/file_utils.py`` and change the line: 164 | ``` 165 | default_cache_dir = Path.home().joinpath('{YOUR_NLSOM_PATH}/checkpoints', 'modelscope') 166 | ``` 167 | 168 |

169 |
170 | 171 | ### 2. APIs 172 | 173 | Please complete the API keys in ``.env.template``. The OpenAI API key is mandatory, while the others depend on your specific requirements. Then, ``mv .env.template .env`` 174 | 175 | ### 3. App 176 | 177 | ```bash 178 | streamlit run app.py 179 | ``` 180 | 181 | 182 | ## 🧸 Demo 183 | 184 | #### 1. Focus more on Mindstorm 185 | 186 |
187 | 188 | Demo 1: Model Collaboration (Multimodal Agents) 👈 [CLICK] 189 | 190 |

191 |

194 |

195 |

196 |

199 |

200 |

201 |

205 |

206 |

207 |

211 |

212 |

213 |

217 |

218 |

219 |

223 |

224 |

225 |

228 |

229 |

230 |

234 |

235 |

236 |

240 |

241 |

242 |

243 |

247 |

248 | 249 |
250 | 251 | 252 |
253 | 254 | Demo 2: Collaborative API Usages (Introduce "AGI") 👈 [CLICK] 255 | some_text 256 |

257 |

260 |

261 |

262 |

265 |

266 |

267 |

271 |

272 |

273 |

278 |

279 |

280 |

284 |

285 |

286 |

291 |

292 |

293 |

297 |

298 |

299 |

303 |

304 |

305 |

309 |

310 |

311 |

315 |

316 |
317 | 318 | 319 |
320 | 321 | Demo 3: Collaborative Role-Play (The Three Kingdoms) 322 | some_text 323 |

324 |

327 |

328 |

329 |

332 |

333 |

334 |

338 |

339 |

340 |

344 |

345 |

346 |

350 |

351 |

352 |

356 |

357 |

358 |

362 |

363 |

364 |

368 |

369 |

370 |

374 |

375 |

376 |

380 |

381 |
382 | 383 | #### 2. Focus more on NLSOM 384 | 385 | 386 |
387 | 388 | Demo 4: Society of Mind (Automatical Task-Solving) 389 |

390 |

393 |

394 |

395 |

398 |

399 |

400 |

403 |

404 |

405 |

409 |

410 |

411 |

415 |

416 |

417 |

420 |

421 |

422 |

428 |

429 |

430 |

434 |

435 |

436 |

440 |

441 |

442 |

446 |

447 |
448 | 449 | 450 | ## ☑️ TODO? 451 | We adopt two ways to conduct NLSOM and Mindstorm: 452 | 453 | **v1.0:** 📋 Preliminary Experiments: In the original [paper](https://arxiv.org/pdf/2305.17066.pdf), NLSOM and Mindstorm is driven by hardcodes. 454 | 455 | **v2.0:** 📋 In this version, NLSOM is self-organized, and Mindstorm happens automatically. 456 | 457 | **v3.0:** 🎯 Future Work: 1) introducing RL; 2) Economy of Minds; 3) Self-Improvement; etc. 458 | 459 | ## 💌 Acknowledgments 460 | 461 | This project utilizes parts of code from the following open-source repositories: [langchain](https://github.com/hwchase17/langchain), [BabyAGI](https://github.com/yoheinakajima/babyagi), [TaskMatrix](https://github.com/microsoft/TaskMatrix), [DataChad](https://github.com/gustavz/DataChad), [streamlit](https://github.com/streamlit/streamlit). We also thank great AI platforms and all the used models or APIs: [huggingface](https://github.com/huggingface/transformers), [modelscope](https://github.com/modelscope/modelscope). 462 | 463 | 464 | ## :black_nib: Citation 465 | 466 | References to cite: 467 | 468 | ``` 469 | @article{zhuge2023mindstorms, 470 | title={Mindstorms in Natural Language-Based Societies of Mind}, 471 | author={Zhuge, Mingchen and Liu, Haozhe and Faccio, Francesco and Ashley, Dylan R and Csord{\'a}s, R{\'o}bert and Gopalakrishnan, Anand and Hamdi, Abdullah and Hammoud, Hasan and Herrmann, Vincent and Irie, Kazuki and Kirsch, Louis and Li, Bing and Li, Guohao and Liu, Shuming and Mai, Jinjie and Pi{\k{e}}kos, Piotr and Ramesh, Aditya and Schlag, Imanol and Shi, Weimin and Stani{\'c}, Aleksandar and Wang, Wenyi and Wang, Yuhui and Xu, Mengmeng and Fan, Deng-Ping and Ghanem, Bernard and Schmidhuber, J{\"u}rgen}, 472 | journal={arXiv preprint arXiv:2305.17066}, 473 | year={2023} 474 | } 475 | ``` 476 | 477 | 478 | 479 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from streamlit_chat import message 3 | from env.recommendation import Organize 4 | from pathlib import Path 5 | import ast 6 | import re 7 | from colorama import Fore, Style 8 | from PIL import Image 9 | import os 10 | import soundfile as sf 11 | import shutil 12 | import torch 13 | 14 | from society.community import * 15 | from env.prompt import AI_SOCIETY 16 | 17 | from langchain.agents.tools import Tool 18 | 19 | os.makedirs('data', exist_ok=True) 20 | 21 | from setting import ( 22 | APP_NAME, 23 | AUTHENTICATION_HELP, 24 | OPENAI_HELP, 25 | HUGGINGFACE_HELP, 26 | BINGSEARCH_HELP, 27 | WOLFRAMALPHA_HELP, 28 | REPLICATE_HELP, 29 | PAGE_ICON, 30 | REPO_URL, 31 | USAGE_HELP, 32 | ) 33 | 34 | from utils import ( 35 | authenticate, 36 | delete_uploaded_file, 37 | generate_response, 38 | logger, 39 | save_uploaded_file, 40 | ) 41 | 42 | 43 | # Page options and header 44 | st.set_option("client.showErrorDetails", True) 45 | st.set_page_config( 46 | page_title=APP_NAME, page_icon=PAGE_ICON, initial_sidebar_state="expanded" 47 | ) 48 | 49 | LOGO_FILE = os.path.join("assets", "nlsom.png") 50 | st.title(':orange[Mindstorms] in NL:blue[SOM]') 51 | st.text("1️⃣ Enter API keys.") 52 | st.text("2️⃣ Upload the task/file. ") 53 | st.text("3️⃣ System organize an NLSOM and conduct mindstorms.") 54 | st.text("4️⃣ Sovle the task.") 55 | 56 | 57 | SESSION_DEFAULTS = { 58 | "past": [], 59 | "usage": {}, 60 | "device": torch.device('cuda' if torch.cuda.is_available() else 'cpu'), 61 | "chat_history": [], 62 | "generated": [], 63 | "data_name": [], 64 | "language": "English", 65 | "models": {}, 66 | "communities": {}, 67 | "agents": {}, 68 | "load_dict": {}, 69 | "data_source": [], 70 | "uploaded_file": None, 71 | "auth_ok": False, 72 | "openai_api_key": None, 73 | "huggingface_api_key": None, 74 | "bingsearch_api_key": None, 75 | "wolframalpha_api_key": None, 76 | "replicate_api_key": None, 77 | } 78 | 79 | 80 | # Initialise session state variables 81 | for k, v in SESSION_DEFAULTS.items(): 82 | if k not in st.session_state: 83 | st.session_state[k] = v 84 | 85 | # Move .env to .streamlit/secrets.toml 86 | os.makedirs(".streamlit", exist_ok=True) 87 | shutil.copyfile(".env", ".streamlit/secrets.toml") 88 | 89 | # Sidebar with Authentication 90 | # Only start App if authentication is OK 91 | with st.sidebar: 92 | 93 | st.title("🔗 API Pool", help=AUTHENTICATION_HELP) 94 | with st.form("authentication"): 95 | 96 | openai_api_key = st.text_input( 97 | "🕹 OpenAI API", 98 | type="password", 99 | help=OPENAI_HELP, 100 | placeholder="This field is mandatory", 101 | ) 102 | huggingface_api_key = st.text_input( 103 | "🕹 HuggingFace API", 104 | type="password", 105 | help=HUGGINGFACE_HELP, 106 | placeholder="This field is optional", 107 | ) 108 | bingsearch_api_key = st.text_input( 109 | "🕹 BingSearch API", 110 | type="password", 111 | help=BINGSEARCH_HELP, 112 | placeholder="This field is optional", 113 | ) 114 | wolframalpha_api_key = st.text_input( 115 | "🕹 WolframAlpha API", 116 | type="password", 117 | help=WOLFRAMALPHA_HELP, 118 | placeholder="This field is optional", 119 | ) 120 | replicate_api_key = st.text_input( 121 | "🕹 Replicate API", 122 | type="password", 123 | help=REPLICATE_HELP, 124 | placeholder="This field is optional", 125 | ) 126 | 127 | language = st.selectbox( 128 | "📖 Language", 129 | ('English', '中文')) 130 | 131 | st.session_state["language"] = language 132 | 133 | submitted = st.form_submit_button("Submit") 134 | if submitted: 135 | #authenticate(openai_api_key, activeloop_token, activeloop_org_name) 136 | authenticate(openai_api_key) 137 | 138 | REPO_URL = "https://github.com/AI-Initiative-KAUST/NLSOM" 139 | st.info(f"🟢 Github Page: [KAUST-AINT-NLSOM]({REPO_URL})") 140 | st.image(LOGO_FILE) 141 | if not st.session_state["auth_ok"]: 142 | st.stop() 143 | 144 | # Clear button to reset all chat communication 145 | clear_button = st.button("Clear Conversation", key="clear") 146 | 147 | if clear_button: 148 | # Resets all chat history related caches 149 | # delete_uploaded_file(st.session_state["data_source"]) 150 | st.session_state["past"] = [] 151 | st.session_state["usage"] = {} 152 | st.session_state["generated"] = [] 153 | st.session_state["chat_history"] = [] 154 | st.session_state["data_name"] = [] 155 | st.session_state["models"] = {} 156 | st.session_state["communities"] = {} 157 | st.session_state["agents"] = {} 158 | st.session_state["load_dict"] = {} 159 | st.session_state["data_source"] = [] 160 | st.session_state["uploaded_file"] = None 161 | 162 | # file upload and data source inputs 163 | uploaded_file = st.file_uploader("Upload a file") 164 | data_source = st.text_input( 165 | "Enter any data source", 166 | placeholder="Any path or URL pointing to a file", 167 | ) 168 | 169 | def get_agent_class(file_path): 170 | with open(file_path, 'r') as f: 171 | tree = ast.parse(f.read()) 172 | classes = [] 173 | for node in ast.iter_child_nodes(tree): 174 | if isinstance(node, ast.ClassDef): 175 | name = node.name 176 | classes.append(name) 177 | return classes 178 | 179 | 180 | def traverse_dir(community): 181 | results = [] 182 | dir_path = "./society/"+community+"/" 183 | for root, dirs, files in os.walk(dir_path): 184 | for file in files: 185 | if file == "agent.py": #file.endswith('.py'): 186 | file_path = os.path.join(root, file) 187 | classes = get_agent_class(file_path) 188 | results.append(classes) 189 | return results[0] 190 | 191 | 192 | def load_candidate(candidate_list, AI_SOCIETY): 193 | 194 | 195 | device = st.session_state["device"] 196 | print(f"Current device: {device}") 197 | 198 | for community in candidate_list: 199 | agents = traverse_dir(community.strip()) 200 | for agent in agents: 201 | st.session_state["load_dict"][str(agent)] = device 202 | if str(community).strip() not in st.session_state["agents"].keys(): 203 | st.session_state["agents"][str(community).strip()] = [str(agent)] 204 | else: 205 | st.session_state["agents"][str(community).strip()].append(str(agent)) 206 | 207 | st.session_state["generated"].append("We load the recommended AI communities with their their corresponding agents:\n{}".format(st.session_state["agents"])) 208 | 209 | st.session_state["chat_history"].append("We load the recommended AI communities with their their corresponding agents:\n{}".format(st.session_state["agents"])) 210 | print(Fore.BLUE + "We load the recommended AI communities with their their corresponding agents:\n{}".format(st.session_state["agents"]), end='') 211 | print(Style.RESET_ALL) 212 | for class_name, device in st.session_state["load_dict"].items(): 213 | st.session_state["models"][class_name] = globals()[class_name](device=device) 214 | 215 | st.session_state["tools"] = [] 216 | for instance in st.session_state["models"].values(): 217 | for e in dir(instance): 218 | if e.startswith('inference'): 219 | func = getattr(instance, e) 220 | st.session_state["tools"].append(Tool(name=func.name, description=func.description, func=func)) 221 | 222 | 223 | 224 | 225 | # Only support one file currently 226 | 227 | if uploaded_file and uploaded_file != st.session_state["uploaded_file"]: 228 | 229 | logger.info(f"Uploaded file: '{uploaded_file.name}'") 230 | st.session_state["uploaded_file"] = uploaded_file 231 | data_source = save_uploaded_file(uploaded_file) 232 | filename = "data/" + uploaded_file.name 233 | 234 | # image source 235 | if len(re.findall(r'\b([-\w]+\.(?:jpg|png|jpeg|bmp|svg|ico|tif|tiff|gif|JPG))\b', filename)) != 0: 236 | filetype = "image" 237 | img = Image.open(filename) 238 | width, height = img.size 239 | ratio = min(512/ width, 512/ height) 240 | img = img.resize((round(width * ratio), round(height * ratio))) 241 | img = img.convert('RGB') 242 | img.save(filename, "PNG") 243 | 244 | # audio source 245 | if len(re.findall(r'\b([-\w]+\.(?:wav|flac|mp3))\b', filename)) != 0: 246 | filetype = "audio" 247 | 248 | # video source 249 | if len(re.findall(r'\b([-\w]+\.(?:avi|mov|flv|mp4|wmv))\b', filename)) != 0: 250 | filetype = "video" 251 | 252 | #data_name = st.session_state["data_name"] = f"![](file={filename})*{filename}*" 253 | data_name = st.session_state["data_name"] = filename 254 | st.session_state["generated"].append(f"Receive a {filetype} file, it stored in {data_name}") 255 | st.session_state["chat_history"].append((data_name, f"Receive the {filetype} file, it stored in {data_name}")) 256 | st.session_state["data_source"] = data_source 257 | 258 | 259 | # container for chat history 260 | response_container = st.container() 261 | # container for text box 262 | container = st.container() 263 | 264 | # As streamlit reruns the whole script on each change 265 | # it is necessary to repopulate the chat containers 266 | with container: 267 | with st.form(key="prompt_input", clear_on_submit=True): 268 | user_input = st.text_area("🎯 Your task:", key="input", height=100) 269 | submit_button = st.form_submit_button(label="Send") 270 | 271 | if submit_button and user_input: 272 | 273 | st.session_state["past"].append(user_input) 274 | community = Organize(user_input) 275 | if st.session_state["data_name"] != []: 276 | user_input = st.session_state["data_name"] + ", " + user_input 277 | print(Fore.BLUE + f"User Input: {user_input}", end='') 278 | print(Style.RESET_ALL) 279 | community = community.replace("[", "").replace("]", "").replace("'", "").split(",") 280 | num_icon = ["1️⃣","2️⃣","3️⃣","4️⃣","5️⃣","6️⃣","7️⃣","8️⃣","9️⃣","🔟"] 281 | recommendation = "\n" 282 | for i in range(len(community)): 283 | recommendation += (num_icon[i] + community[i]) + "\n" 284 | st.session_state["generated"].append(f"Based on this objective, I recommend that NLSOM includes the following AI communities: {recommendation}") 285 | print(Fore.BLUE + f"Based on this objective, I recommend that NLSOM includes the following AI communities: {recommendation}", end='') 286 | print(Style.RESET_ALL) 287 | st.session_state["chat_history"].append(f"Based on this objective, I recommend that NLSOM includes the following AI communities: {recommendation}") 288 | load_candidate(community, AI_SOCIETY) 289 | 290 | responce = generate_response(user_input, st.session_state["tools"], st.session_state["chat_history"]) 291 | 292 | print("###"*20) 293 | print(responce) 294 | print("###"*20) 295 | 296 | review, output, reward = responce.split("\n")[0], responce.split("\n")[1], responce.split("\n")[2] 297 | if "Analyze the employed agents" in review: # The review was unsuccessful, possibly due to the ongoing process or the brevity of the content. 298 | review = review.split("Analyze the employed agents")[0].strip("[").strip("]") 299 | 300 | st.session_state["generated"].append(review) 301 | st.session_state["generated"].append(output) 302 | st.session_state["generated"].append(reward) 303 | st.session_state["generated"].append(responce) 304 | 305 | if st.session_state["generated"]: 306 | with response_container: 307 | for i in range(len(st.session_state["past"])): 308 | #print(st.session_state["past"]) 309 | message(st.session_state["past"][i], is_user=True, key=str(i) + "_user") 310 | 311 | for i in range(len(st.session_state["generated"])): 312 | #print(st.session_state["generated"]) 313 | if i==0: continue 314 | message(st.session_state["generated"][i], key=str(i)) 315 | 316 | image_parse = re.findall(r'\b([-\w]+\.(?:jpg|png|jpeg|bmp|svg|ico|tif|tiff|gif|JPG))\b', st.session_state["generated"][i]) 317 | if image_parse != []: 318 | image = Image.open(os.path.join("data", image_parse[-1])) 319 | st.image(image, caption=image_parse[-1]) 320 | 321 | audio_parse = re.findall(r'\b([-\w]+\.(?:wav|flac|mp3))\b', st.session_state["generated"][i]) 322 | if audio_parse != []: 323 | audio_format = audio_parse[-1].split(".")[-1] 324 | 325 | if audio_format != "wav": 326 | audio_bytes, samplerate = sf.read(os.path.join("data", audio_parse[-1])) 327 | audio_format = "wav" 328 | st.audio(audio_bytes, format=f"audio/{audio_format}", sample_rate=samplerate) 329 | else: 330 | audio_file = open(os.path.join("data", audio_parse[-1])) 331 | audio_bytes = audio_file.read(os.path.join("data", audio_parse[-1])) 332 | st.audio(audio_bytes, format=f"audio/{audio_format}") 333 | 334 | video_parse = re.findall(r'\b([-\w]+\.(?:avi|mov|flv|mp4|wmv))\b', st.session_state["generated"][i]) 335 | if video_parse != []: 336 | video_file = open(os.path.join("data", video_parse[-1]), "rb") 337 | video_bytes = video_file.read() 338 | st.video(video_bytes) 339 | 340 | 341 | # Usage sidebar with total used tokens and costs 342 | # We put this at the end to be able to show usage starting with the first response 343 | with st.sidebar: 344 | if st.session_state["usage"]: 345 | st.divider() 346 | st.title("Usage", help=USAGE_HELP) 347 | col1, col2 = st.columns(2) 348 | col1.metric("Total Tokens", st.session_state["usage"]["total_tokens"]) 349 | col2.metric("Total Costs in $", st.session_state["usage"]["total_cost"]) 350 | -------------------------------------------------------------------------------- /assets/47bec94a.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metauto-ai/NLSOM/e33244a8b0f8e3f6ab05b8b06ad3547e8df6575a/assets/47bec94a.mp4 -------------------------------------------------------------------------------- /assets/ai-init.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metauto-ai/NLSOM/e33244a8b0f8e3f6ab05b8b06ad3547e8df6575a/assets/ai-init.png -------------------------------------------------------------------------------- /assets/api_head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metauto-ai/NLSOM/e33244a8b0f8e3f6ab05b8b06ad3547e8df6575a/assets/api_head.png -------------------------------------------------------------------------------- /assets/e1.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metauto-ai/NLSOM/e33244a8b0f8e3f6ab05b8b06ad3547e8df6575a/assets/e1.jpeg -------------------------------------------------------------------------------- /assets/e2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metauto-ai/NLSOM/e33244a8b0f8e3f6ab05b8b06ad3547e8df6575a/assets/e2.png -------------------------------------------------------------------------------- /assets/model_head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metauto-ai/NLSOM/e33244a8b0f8e3f6ab05b8b06ad3547e8df6575a/assets/model_head.png -------------------------------------------------------------------------------- /assets/nlsom.log: -------------------------------------------------------------------------------- 1 | 2 | ◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍ 3 | 4 | ███╗ ██╗ ██╗ ███████╗ ██████╗ ███╗ ███╗ 5 | ████╗ ██║ ██║ ██╔════╝ ██╔═══██╗ ████╗ ████║ 6 | ██╔██╗ ██║ ██║ ███████╗ ██║ ██║ ██╔████╔██║ 7 | ██║╚██╗██║ ██║ ╚════██║ ██║ ██║ ██║╚██╔╝██║ 8 | ██║ ╚████║ ███████╗ ███████║ ╚██████╔╝ ██║ ╚═╝ ██║ 9 | ╚═╝ ╚═══╝ ╚══════╝ ╚══════╝ ╚═════╝ ╚═╝ ╚═╝ 10 | 🔵🟠🔴 Everyone can build different Natural Languaged-Based Society of Mind (NLSOMs) efficiently! 🟣🟢🟡 11 | 🟣🟢🟡 This project is a technical extension for "Mindstorms in Natural Language-Based Societies of Mind". 🔵🟠🔴 12 | 13 | ◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍◍ -------------------------------------------------------------------------------- /assets/nlsom.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metauto-ai/NLSOM/e33244a8b0f8e3f6ab05b8b06ad3547e8df6575a/assets/nlsom.png -------------------------------------------------------------------------------- /assets/role_play_head.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metauto-ai/NLSOM/e33244a8b0f8e3f6ab05b8b06ad3547e8df6575a/assets/role_play_head.png -------------------------------------------------------------------------------- /assets/ui.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metauto-ai/NLSOM/e33244a8b0f8e3f6ab05b8b06ad3547e8df6575a/assets/ui.jpeg -------------------------------------------------------------------------------- /assets/validators-0.20.0.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metauto-ai/NLSOM/e33244a8b0f8e3f6ab05b8b06ad3547e8df6575a/assets/validators-0.20.0.zip -------------------------------------------------------------------------------- /assets/vqa_instance.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metauto-ai/NLSOM/e33244a8b0f8e3f6ab05b8b06ad3547e8df6575a/assets/vqa_instance.png -------------------------------------------------------------------------------- /env/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/metauto-ai/NLSOM/e33244a8b0f8e3f6ab05b8b06ad3547e8df6575a/env/__init__.py -------------------------------------------------------------------------------- /env/prompt.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # NLSOM AI Community Candidate 4 | 5 | AI_SOCIETY = [] 6 | 7 | for dirname in os.listdir("society"): 8 | if "__" in dirname or "." in dirname: 9 | continue 10 | else: 11 | AI_SOCIETY.append(dirname) 12 | 13 | # NLSOM Recommendation 14 | 15 | ORGANIZING_EXAMPLE = [ 16 | {"objective": "Describe the image and generate another similar one", "society": str(AI_SOCIETY), "organizing": ["image_captioning", "text_to_image"]}, 17 | {"objective": "Explain the audio and search for related information", "society": str(AI_SOCIETY), "organizing": ["audio_to_text", "search"]}, 18 | {"objective": "Generate a beautiful yellow clothe image and change the clothe of the woman in the picture, then describe it", "society": str(AI_SOCIETY), "organizing": ["text_to_image", "image_to_text"]}, 19 | {"objective": "Let me know the words in this picture", "society": str(AI_SOCIETY), "organizing": ["ocr"]}, 20 | {"objective": "Tell me about the \"Neural Network\".", "society": str(AI_SOCIETY), "organizing": ["search"]}, 21 | {"objective": "Generate a 3D model from the 2D image", "society": str(AI_SOCIETY), "organizing": ["image_captioning", "text_to_3D"]}, 22 | {"objective": "Generate an image about Beijing Olympic Game", "society": str(AI_SOCIETY), "organizing": ["text_to_image"]}, 23 | {"objective": "Make the girl's skin in this photo better and then according to this image to generate a description", "society": str(AI_SOCIETY), "organizing": ["skin_retouching", "image_captioning"]}, 24 | {"objective": "Describe this image in details", "society": str(AI_SOCIETY), "organizing": ["image_captioning"]}, 25 | {"objective": "Make the body of the woman in this image more beautiful", "society": str(AI_SOCIETY), "organizing": ["body_reshaping"]}, 26 | {"objective": "Show me the answer of the question in the image", "society": str(AI_SOCIETY), "organizing": ["image_captioning", "ocr"]}, 27 | {"objective": "If you are in the Three Kingdoms can conquer the world.", "society": str(AI_SOCIETY), "organizing": ["role_play"]}, 28 | {"objective": "Introduce the \"KAUST AI Initiative\".", "society": str(AI_SOCIETY), "organizing": ["search"]}, 29 | {"objective": "In this image, how many candles in the table? Choice: (a) 2, (b) 4, (c) 6, (d) 5. Answer:", "society": str(AI_SOCIETY), "organizing": ["vqa"]}, 30 | {"objective": "VQA question: What is the relationship between the two individuals?", "society": str(AI_SOCIETY), "organizing": ["vqa"]}, 31 | {"objective": "Add color to this grayscale picture and generate a description based on the colored rendition afterwards.", "society": str(AI_SOCIETY), "organizing": ["image_colorization", "image_captioning"]}, 32 | {"objective": "Help improve the person's appearance in this photo.", "society": str(AI_SOCIETY), "organizing": ["body_reshaping"]}, 33 | {"objective": "Help me improve the skin of the woman in the photo.", "society": str(AI_SOCIETY), "organizing": ["skin_retouching"]}, 34 | ] 35 | 36 | # NLSOM Task-Solving/Mindstorm/Reward 37 | 38 | NLSOM_PREFIX = """You are the NLSOM (Natural Language-based Society of Mind), which aims to build a system similar to the human mind society (The Society of Mind by Marvin Minsky). 39 | 40 | Like the Society of Mind, NLSOM also consists of agents. In this case, the agents are composed of different AI models, tools, and role-players, indirectly simulating various functions of the human mind. 41 | 42 | NLSOM can handle and comprehend multimodal inputs such as text, images, audio, and video using multiple agents with different functionalities. 43 | 44 | In the initial stage, you recommend communities to accomplish the user-defined objective, where each community may contain one or multiple agents. For example, the VQA community includes different VQA agents. 45 | 46 | When multiple agents in one community are available, you should utilize all of them. We call this "Mindstorm" which facilitates incorporating different perspectives and achieving a more comprehensive understanding. 47 | 48 | Simultaneously, you can provide different inputs to the same agent related to the original input (equal or progressive). 49 | 50 | AGENTS: 51 | ------ 52 | 53 | NLSOM has access to the following agents: """ 54 | 55 | 56 | NLSOM_FORMAT_INSTRUCTIONS = """To address the user-defined objective, you should use all the agents from [{tool_names}]. 57 | 58 | If multiple agents exist within the same community, you can employ different agents to solve the same problem, thus incorporating diverse perspectives. 59 | 60 | After inputting information to an agent and receiving a response, you can generate the following input based on the user-defined objective, the current input, and the agent's reply. This iterative process aims to optimize the results. 61 | 62 | To progressively invoke the various tools within the NLSOM, please use the following format: 63 | 64 | ``` 65 | Thought: Have all the agents in [{tool_names}] been truly utilized (served as Action)? No 66 | Action: The action to take, one of [{tool_names}] which did not use 67 | Action Input: The input to the action 68 | Observation: The result of the action. 69 | ``` 70 | 71 | Please remember that "Mindstorm" across every agent is important. 72 | 73 | You should always be honest and refrain from imagining or lying. 74 | 75 | Suppose you have already used all of the agents. 76 | 77 | In that case, NLSOM should remember to provide the Human with the detailed organization of NLSOM, implementation information, agents' outputs, and rewards for each agent in the final response, including the following format: 78 | 79 | ``` 80 | Thought: Have all the agents in [{tool_names}] been truly utilized (served as Action)? Yes 81 | {ai_prefix}: 82 | Review: [1) Whether the NLSOM has utilized all the agents? 2) Whether the NLSOM has solved the user-defined objective? Analyze the employed agents, NLSOM's organizational structure, and their outputs.] 83 | Summary: [According to the outputs of each agent, provide a solution to the user-defined objective as comprehensively as possible. You MUST record all the filenames if they exist. Do not use "\n".] 84 | Reward: [Provide rewards (0-3) to each agent according to their contributions to the user-defined objective; The rewards should be different according to the real contributions; use the dict format like ["agent": "reward"]. Don't ignore giving a reward to any agent. ] 85 | ``` 86 | """ 87 | 88 | 89 | NLSOM_SUFFIX = """You are very strict to the filename correctness and will never fake a file name if it does not exist. 90 | 91 | If the file name was provided in the last observation of an agent, please remember it. When using an agent, the parameters must be in English. 92 | 93 | Let's get started! 94 | 95 | Previous conversation history: 96 | {chat_history} 97 | 98 | New input: {input} 99 | Thought: Should we organize one agent? {agent_scratchpad} 100 | """ -------------------------------------------------------------------------------- /env/recommendation.py: -------------------------------------------------------------------------------- 1 | import os 2 | from langchain import PromptTemplate, FewShotPromptTemplate 3 | from langchain.llms import OpenAI 4 | 5 | from env.prompt import AI_SOCIETY, ORGANIZING_EXAMPLE 6 | 7 | in_context_template = """Objective: {objective}\nSociety: {society}\nOrganizing: {organizing}""" 8 | 9 | example_prompt = PromptTemplate( 10 | input_variables=["objective", "society", "organizing"], 11 | template=in_context_template, 12 | ) 13 | 14 | few_shot_prompt = FewShotPromptTemplate( 15 | examples=ORGANIZING_EXAMPLE, 16 | example_prompt=example_prompt, 17 | prefix="Organize the AI society", 18 | suffix="Objective: {input}\nSociety: {society}\nOrganizing: ", 19 | input_variables=["input", "society"], 20 | #example_separator="\n\n", 21 | ) 22 | 23 | def Organize(objective): 24 | NLSOM_candiate = few_shot_prompt.format(input=objective, society=str(AI_SOCIETY)) 25 | llm = OpenAI(temperature=0.1) 26 | return llm(NLSOM_candiate) 27 | -------------------------------------------------------------------------------- /nlsom.yaml: -------------------------------------------------------------------------------- 1 | name: nlsom 2 | channels: 3 | - pytorch 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex=0.1=main 7 | - _openmp_mutex=5.1=1_gnu 8 | - blas=1.0=mkl 9 | - bzip2=1.0.8=h7b6447c_0 10 | - ca-certificates=2023.01.10=h06a4308_0 11 | - cudatoolkit=11.3.1=h2bc3f7f_2 12 | - ffmpeg=4.3=hf484d3e_0 13 | - freetype=2.12.1=h4a9f257_0 14 | - giflib=5.2.1=h5eee18b_3 15 | - gmp=6.2.1=h295c915_3 16 | - gnutls=3.6.15=he1e5248_0 17 | - intel-openmp=2023.1.0=hdb19cb5_46305 18 | - jpeg=9e=h5eee18b_1 19 | - lame=3.100=h7b6447c_0 20 | - lcms2=2.12=h3be6417_0 21 | - ld_impl_linux-64=2.38=h1181459_1 22 | - lerc=3.0=h295c915_0 23 | - libdeflate=1.17=h5eee18b_0 24 | - libffi=3.4.4=h6a678d5_0 25 | - libgcc-ng=11.2.0=h1234567_1 26 | - libgomp=11.2.0=h1234567_1 27 | - libiconv=1.16=h7f8727e_2 28 | - libidn2=2.3.4=h5eee18b_0 29 | - libpng=1.6.39=h5eee18b_0 30 | - libstdcxx-ng=11.2.0=h1234567_1 31 | - libtasn1=4.19.0=h5eee18b_0 32 | - libtiff=4.5.0=h6a678d5_2 33 | - libunistring=0.9.10=h27cfd23_0 34 | - libuv=1.44.2=h5eee18b_0 35 | - libwebp=1.2.4=h11a3e52_1 36 | - libwebp-base=1.2.4=h5eee18b_1 37 | - lz4-c=1.9.4=h6a678d5_0 38 | - mkl=2023.1.0=h6d00ec8_46342 39 | - mkl-service=2.4.0=py38h5eee18b_1 40 | - mkl_fft=1.3.6=py38h417a72b_1 41 | - mkl_random=1.2.2=py38h417a72b_1 42 | - ncurses=6.4=h6a678d5_0 43 | - nettle=3.7.3=hbbd107a_1 44 | - openh264=2.1.1=h4ff587b_0 45 | - openssl=1.1.1t=h7f8727e_0 46 | - pillow=9.4.0=py38h6a678d5_0 47 | - python=3.8.16=h7a1cb2a_3 48 | - pytorch=1.10.1=py3.8_cuda11.3_cudnn8.2.0_0 49 | - pytorch-mutex=1.0=cuda 50 | - readline=8.2=h5eee18b_0 51 | - sqlite=3.41.2=h5eee18b_0 52 | - tbb=2021.8.0=hdb19cb5_0 53 | - tk=8.6.12=h1ccaba5_0 54 | - torchaudio=0.10.1=py38_cu113 55 | - torchvision=0.11.2=py38_cu113 56 | - typing_extensions=4.5.0=py38h06a4308_0 57 | - wheel=0.38.4=py38h06a4308_0 58 | - xz=5.4.2=h5eee18b_0 59 | - zlib=1.2.13=h5eee18b_0 60 | - zstd=1.5.5=hc292b87_0 61 | - pip: 62 | - absl-py==1.4.0 63 | - accelerate==0.19.0 64 | - addict==2.4.0 65 | - aiohttp==3.8.4 66 | - aiosignal==1.3.1 67 | - aliyun-python-sdk-core==2.13.36 68 | - aliyun-python-sdk-kms==2.16.0 69 | - altair==4.2.2 70 | - antlr4-python3-runtime==4.8 71 | - arxiv==1.4.7 72 | - astunparse==1.6.3 73 | - async-timeout==4.0.2 74 | - attrs==23.1.0 75 | - audioread==3.0.0 76 | - backports-zoneinfo==0.2.1 77 | - beautifulsoup4==4.12.2 78 | - bitarray==2.7.3 79 | - blinker==1.6.2 80 | - cachetools==5.3.1 81 | - certifi==2023.5.7 82 | - cffi==1.15.1 83 | - charset-normalizer==3.1.0 84 | - click==8.1.3 85 | - colorama==0.4.6 86 | - contourpy==1.0.7 87 | - crcmod==1.7 88 | - cryptography==40.0.2 89 | - cycler==0.11.0 90 | - cython==0.29.35 91 | - dataclasses-json==0.5.7 92 | - datasets==2.8.0 93 | - decorator==5.1.1 94 | - decord==0.6.0 95 | - diffusers==0.14.0 96 | - dill==0.3.6 97 | - easyocr==1.7.0 98 | - einops==0.6.1 99 | - entrypoints==0.4 100 | - fairseq==0.12.2 101 | - feedparser==6.0.10 102 | - filelock==3.12.0 103 | - flatbuffers==23.5.26 104 | - fonttools==4.39.4 105 | - frozenlist==1.3.3 106 | - fsspec==2023.5.0 107 | - ftfy==6.1.1 108 | - gast==0.4.0 109 | - gitdb==4.0.10 110 | - gitpython==3.1.31 111 | - google-auth==2.19.0 112 | - google-auth-oauthlib==1.0.0 113 | - google-pasta==0.2.0 114 | - greenlet==2.0.2 115 | - grpcio==1.54.2 116 | - h5py==3.8.0 117 | - huggingface-hub==0.14.1 118 | - hydra-core==1.0.7 119 | - idna==3.4 120 | - imageio==2.29.0 121 | - importlib-metadata==6.6.0 122 | - importlib-resources==5.12.0 123 | - jaraco-context==4.3.0 124 | - jax==0.4.10 125 | - jinja2==3.1.2 126 | - jmespath==0.10.0 127 | - joblib==1.2.0 128 | - jsonschema==4.17.3 129 | - keras==2.12.0 130 | - kiwisolver==1.4.4 131 | - langchain==0.0.158 132 | - lazy-loader==0.2 133 | - libclang==16.0.0 134 | - librosa==0.9.2 135 | - llvmlite==0.40.0 136 | - lxml==4.9.2 137 | - markdown==3.4.3 138 | - markdown-it-py==2.2.0 139 | - markupsafe==2.1.2 140 | - marshmallow==3.19.0 141 | - marshmallow-enum==1.5.1 142 | - matplotlib==3.7.1 143 | - mdurl==0.1.2 144 | - ml-dtypes==0.1.0 145 | - modelscope==1.6.0 146 | - more-itertools==9.1.0 147 | - multidict==6.0.4 148 | - multiprocess==0.70.14 149 | - mypy-extensions==1.0.0 150 | - networkx==3.1 151 | - ninja==1.11.1 152 | - nltk==3.8.1 153 | - numba==0.57.0 154 | - numexpr==2.8.4 155 | - numpy==1.22.0 156 | - nvidia-cublas-cu11==11.11.3.6 157 | - nvidia-cudnn-cu11==8.6.0.163 158 | - oauthlib==3.2.2 159 | - omegaconf==2.0.6 160 | - open-clip-torch==2.20.0 161 | - openai==0.27.7 162 | - openapi-schema-pydantic==1.2.4 163 | - opencv-python==4.7.0.72 164 | - opencv-python-headless==4.7.0.72 165 | - opt-einsum==3.3.0 166 | - oss2==2.17.0 167 | - packaging==23.1 168 | - pandas==1.4.3 169 | - pip==23.1.2 170 | - pkgutil-resolve-name==1.3.10 171 | - platformdirs==3.5.1 172 | - pooch==1.7.0 173 | - portalocker==2.7.0 174 | - protobuf==3.20.3 175 | - psutil==5.9.5 176 | - pyarrow==12.0.0 177 | - pyasn1==0.5.0 178 | - pyasn1-modules==0.3.0 179 | - pyclipper==1.3.0.post4 180 | - pycocoevalcap==1.2 181 | - pycocotools==2.0.6 182 | - pycparser==2.21 183 | - pycryptodome==3.18.0 184 | - pydantic==1.10.8 185 | - pydeck==0.8.1b0 186 | - pydeprecate==0.3.2 187 | - pygments==2.15.1 188 | - pymcubes==0.1.4 189 | - pympler==1.0.1 190 | - pyparsing==3.0.9 191 | - pyrsistent==0.19.3 192 | - python-bidi==0.4.2 193 | - python-dateutil==2.8.2 194 | - pytorch-lightning==1.7.7 195 | - pytz==2023.3 196 | - pywavelets==1.4.1 197 | - pyyaml==6.0 198 | - rapidfuzz==3.0.0 199 | - regex==2023.5.5 200 | - replicate==0.8.3 201 | - requests==2.31.0 202 | - requests-oauthlib==1.3.1 203 | - resampy==0.4.2 204 | - responses==0.18.0 205 | - rich==13.3.5 206 | - rouge-score==0.0.4 207 | - rsa==4.9 208 | - sacrebleu==2.3.1 209 | - safetensors==0.3.1 210 | - scikit-image==0.20.0 211 | - scikit-learn==1.2.2 212 | - scipy==1.9.1 213 | - sentencepiece==0.1.99 214 | - setuptools==59.5.0 215 | - sgmllib3k==1.0.0 216 | - shapely==2.0.1 217 | - simplejson==3.19.1 218 | - six==1.16.0 219 | - smmap==5.0.0 220 | - sortedcontainers==2.4.0 221 | - soundfile==0.12.1 222 | - soupsieve==2.4.1 223 | - sqlalchemy==2.0.12 224 | - streamlit==1.22.0 225 | - streamlit-chat==0.0.2.2 226 | - tabulate==0.9.0 227 | - taming-transformers-rom1504==0.0.6 228 | - tenacity==8.2.2 229 | - tensorboard==2.12.3 230 | - tensorboard-data-server==0.7.0 231 | - tensorboardx==2.6 232 | - tensorflow==2.12.0 233 | - tensorflow-estimator==2.12.0 234 | - tensorflow-io-gcs-filesystem==0.32.0 235 | - termcolor==2.3.0 236 | - threadpoolctl==3.1.0 237 | - tifffile==2023.4.12 238 | - timm==0.9.2 239 | - tokenizers==0.13.3 240 | - toml==0.10.2 241 | - tomli==2.0.1 242 | - toolz==0.12.0 243 | - torchmetrics==0.11.4 244 | - tornado==6.3.2 245 | - tqdm==4.65.0 246 | - transformers==4.29.2 247 | - trimesh==3.21.7 248 | - typing-inspect==0.9.0 249 | - tzlocal==5.0.1 250 | - unicodedata2==15.0.0 251 | - urllib3==1.26.16 252 | - validators==0.20.0 253 | - watchdog==3.0.0 254 | - wcwidth==0.2.6 255 | - werkzeug==2.3.4 256 | - wikipedia==1.4.0 257 | - wolframalpha==5.0.0 258 | - wrapt==1.14.1 259 | - xmltodict==0.13.0 260 | - xxhash==3.2.0 261 | - yapf==0.33.0 262 | - yarl==1.9.2 263 | - zhconv==1.4.3 264 | - zipp==3.15.0 265 | prefix: {YOUR_ANACONDA_PATH}/anaconda3/envs/nlsom 266 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==1.4.0 2 | accelerate==0.19.0 3 | addict==2.4.0 4 | aiohttp==3.8.4 5 | aiosignal==1.3.1 6 | aliyun-python-sdk-core==2.13.36 7 | aliyun-python-sdk-kms==2.16.0 8 | altair==4.2.2 9 | antlr4-python3-runtime==4.8 10 | arxiv==1.4.7 11 | astunparse==1.6.3 12 | async-timeout==4.0.2 13 | attrs==23.1.0 14 | audioread==3.0.0 15 | backports.zoneinfo==0.2.1 16 | beautifulsoup4==4.12.2 17 | bitarray==2.7.3 18 | blinker==1.6.2 19 | cachetools==5.3.1 20 | certifi==2023.5.7 21 | cffi==1.15.1 22 | charset-normalizer==3.1.0 23 | click==8.1.3 24 | colorama==0.4.6 25 | contourpy==1.0.7 26 | crcmod==1.7 27 | cryptography==40.0.2 28 | cycler==0.11.0 29 | Cython==0.29.35 30 | dataclasses-json==0.5.7 31 | datasets==2.8.0 32 | decorator==5.1.1 33 | decord==0.6.0 34 | diffusers==0.14.0 35 | dill==0.3.6 36 | easyocr==1.7.0 37 | einops==0.6.1 38 | entrypoints==0.4 39 | fairseq==0.12.2 40 | feedparser==6.0.10 41 | filelock==3.12.0 42 | flatbuffers==23.5.26 43 | fonttools==4.39.4 44 | frozenlist==1.3.3 45 | fsspec==2023.5.0 46 | ftfy==6.1.1 47 | gast==0.4.0 48 | gitdb==4.0.10 49 | GitPython==3.1.31 50 | google-auth==2.19.0 51 | google-auth-oauthlib==1.0.0 52 | google-pasta==0.2.0 53 | greenlet==2.0.2 54 | grpcio==1.54.2 55 | h5py==3.8.0 56 | huggingface-hub==0.14.1 57 | hydra-core==1.0.7 58 | idna==3.4 59 | imageio==2.29.0 60 | importlib-metadata==6.6.0 61 | importlib-resources==5.12.0 62 | jaraco.context==4.3.0 63 | jax==0.4.10 64 | Jinja2==3.1.2 65 | jmespath==0.10.0 66 | joblib==1.2.0 67 | jsonschema==4.17.3 68 | keras==2.12.0 69 | kiwisolver==1.4.4 70 | langchain==0.0.158 71 | lazy_loader==0.2 72 | libclang==16.0.0 73 | librosa==0.9.2 74 | llvmlite==0.40.0 75 | lxml==4.9.2 76 | Markdown==3.4.3 77 | markdown-it-py==2.2.0 78 | MarkupSafe==2.1.2 79 | marshmallow==3.19.0 80 | marshmallow-enum==1.5.1 81 | matplotlib==3.7.1 82 | mdurl==0.1.2 83 | mkl-fft==1.3.6 84 | mkl-random 85 | mkl-service==2.4.0 86 | ml-dtypes==0.1.0 87 | modelscope==1.6.0 88 | more-itertools==9.1.0 89 | multidict==6.0.4 90 | multiprocess==0.70.14 91 | mypy-extensions==1.0.0 92 | networkx==3.1 93 | ninja==1.11.1 94 | nltk==3.8.1 95 | numba==0.57.0 96 | numexpr==2.8.4 97 | numpy==1.22.0 98 | nvidia-cublas-cu11==11.11.3.6 99 | nvidia-cudnn-cu11==8.6.0.163 100 | oauthlib==3.2.2 101 | omegaconf==2.0.6 102 | open-clip-torch==2.20.0 103 | openai==0.27.7 104 | openapi-schema-pydantic==1.2.4 105 | opencv-python==4.7.0.72 106 | opencv-python-headless==4.7.0.72 107 | opt-einsum==3.3.0 108 | oss2==2.17.0 109 | packaging==23.1 110 | pandas==1.4.3 111 | Pillow==9.4.0 112 | pkgutil_resolve_name==1.3.10 113 | platformdirs==3.5.1 114 | pooch==1.7.0 115 | portalocker==2.7.0 116 | protobuf==3.20.3 117 | psutil==5.9.5 118 | pyarrow==12.0.0 119 | pyasn1==0.5.0 120 | pyasn1-modules==0.3.0 121 | pyclipper==1.3.0.post4 122 | pycocoevalcap==1.2 123 | pycocotools==2.0.6 124 | pycparser==2.21 125 | pycryptodome==3.18.0 126 | pydantic==1.10.8 127 | pydeck==0.8.1b0 128 | pyDeprecate==0.3.2 129 | Pygments==2.15.1 130 | PyMCubes==0.1.4 131 | Pympler==1.0.1 132 | pyparsing==3.0.9 133 | pyrsistent==0.19.3 134 | python-bidi==0.4.2 135 | python-dateutil==2.8.2 136 | pytorch-lightning==1.7.7 137 | pytz==2023.3 138 | PyWavelets==1.4.1 139 | PyYAML==6.0 140 | rapidfuzz==3.0.0 141 | regex==2023.5.5 142 | replicate==0.8.3 143 | requests==2.31.0 144 | requests-oauthlib==1.3.1 145 | resampy==0.4.2 146 | responses==0.18.0 147 | rich==13.3.5 148 | rouge-score==0.0.4 149 | rsa==4.9 150 | sacrebleu==2.3.1 151 | safetensors==0.3.1 152 | scikit-image==0.20.0 153 | scikit-learn==1.2.2 154 | scipy==1.9.1 155 | sentencepiece==0.1.99 156 | sgmllib3k==1.0.0 157 | shapely==2.0.1 158 | simplejson==3.19.1 159 | six==1.16.0 160 | smmap==5.0.0 161 | sortedcontainers==2.4.0 162 | soundfile==0.12.1 163 | soupsieve==2.4.1 164 | SQLAlchemy==2.0.12 165 | streamlit==1.22.0 166 | streamlit-chat==0.0.2.2 167 | tabulate==0.9.0 168 | taming-transformers-rom1504==0.0.6 169 | tenacity==8.2.2 170 | tensorboard==2.12.3 171 | tensorboard-data-server==0.7.0 172 | tensorboardX==2.6 173 | tensorflow==2.12.0 174 | tensorflow-estimator==2.12.0 175 | tensorflow-io-gcs-filesystem==0.32.0 176 | termcolor==2.3.0 177 | threadpoolctl==3.1.0 178 | tifffile==2023.4.12 179 | timm==0.9.2 180 | tokenizers==0.13.3 181 | toml==0.10.2 182 | tomli==2.0.1 183 | toolz==0.12.0 184 | torch==1.10.1 185 | torchaudio==0.10.1 186 | torchmetrics==0.11.4 187 | torchvision==0.11.2 188 | tornado==6.3.2 189 | tqdm==4.65.0 190 | transformers==4.29.2 191 | trimesh==3.21.7 192 | typing-inspect==0.9.0 193 | typing_extensions 194 | tzlocal==5.0.1 195 | unicodedata2==15.0.0 196 | urllib3==1.26.16 197 | validators==0.20.0 198 | watchdog==3.0.0 199 | wcwidth==0.2.6 200 | Werkzeug==2.3.4 201 | wikipedia==1.4.0 202 | wolframalpha==5.0.0 203 | wrapt==1.14.1 204 | xmltodict==0.13.0 205 | xxhash==3.2.0 206 | yapf==0.33.0 207 | yarl==1.9.2 208 | zhconv==1.4.3 209 | zipp==3.15.0 210 | -------------------------------------------------------------------------------- /setting.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | APP_NAME = "NLSOM" 4 | MODEL = "gpt-3.5-turbo" 5 | PAGE_ICON = "🤯" 6 | 7 | CHUNK_SIZE = 1000 8 | 9 | DATA_PATH = Path.cwd() / "data" 10 | REPO_URL = "https://github.com/AI-Initiative-KAUST/NLSOM" 11 | 12 | AUTHENTICATION_HELP = f""" 13 | The keys are neither exposed nor made visible or stored permanently in any way.\n 14 | """ 15 | 16 | USAGE_HELP = f""" 17 | These are the accumulated OpenAI API usage metrics.\n 18 | The app uses '{MODEL}' for chat and 'text-davinci-003' for recommendations or role-players.\n 19 | Learn more about OpenAI's pricing [here](https://openai.com/pricing#language-models) 20 | """ 21 | 22 | OPENAI_HELP = """ 23 | You can sign-up for OpenAI's API [here](https://openai.com/blog/openai-api).\n 24 | Once you are logged in, you find the API keys [here](https://platform.openai.com/account/api-keys) 25 | """ 26 | 27 | HUGGINGFACE_HELP = """ 28 | For Huggingface, please refer to https://huggingface.co/inference-api 29 | """ 30 | 31 | BINGSEARCH_HELP = """ 32 | For BingSearch, please refer to https://www.microsoft.com/en-us/bing/apis/bing-web-search-api 33 | """ 34 | 35 | WOLFRAMALPHA_HELP = """ 36 | Please refer to https://products.wolframalpha.com/api 37 | """ 38 | 39 | REPLICATE_HELP = """ 40 | Please refer to https://replicate.com 41 | """ 42 | 43 | MODELSCOPE_HELP = """ 44 | Please refer to http://www.modelscope.cn 45 | """ 46 | -------------------------------------------------------------------------------- /society/audio_recognition/agent.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | #from modelscope.pipelines import pipeline 4 | 5 | def prompts(name, description): 6 | def decorator(func): 7 | func.name = name 8 | func.description = description 9 | return func 10 | 11 | return decorator 12 | 13 | class Whisper: 14 | def __init__(self, device="cpu"): 15 | self.device = device 16 | self.API_URL = "https://api-inference.huggingface.co/models/openai/whisper-base" 17 | self.headers = {"Authorization": "Bearer "+os.getenv("HUGGINGFACE_ACCESS_Tokens")} 18 | 19 | 20 | @prompts(name="Whisper", 21 | description="useful when you want to recognize the context of an audio file. " 22 | "Whisper is a general-purpose speech recognition model. " 23 | "The input to this tool should be a string, representing the text used to generate image. ") 24 | def inference(self, filename): 25 | 26 | audio_json = self.query(filename) 27 | 28 | print( 29 | f"\nProcessed Audio2Text, Input File: {filename}, Output Content: {audio_json}") 30 | return audio_json["text"] 31 | 32 | 33 | def query(self, filename): 34 | with open(filename, "rb") as f: 35 | data = f.read() 36 | response = requests.post(self.API_URL, headers=self.headers, data=data) 37 | return response.json() 38 | 39 | 40 | if __name__ == "__main__": 41 | #"http://www.modelscope.cn/api/v1/models/damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online/repo?Revision=master\u0026FilePath=example/asr_example.wav" 42 | 43 | asr_model = Whisper() 44 | #asr_model = Paraformer(device="cuda:0") 45 | result = asr_model.inference("sample1.flac") 46 | print(result) 47 | 48 | 49 | # class Paraformer: 50 | # def __init__(self, device="cuda:0"): 51 | # self.device = device 52 | # model_id = 'damo/speech_paraformer-large_asr_nat-zh-cn-16k-common-vocab8404-online' 53 | # self.pipeline_asr = pipeline('auto-speech-recognition', model=model_id) 54 | 55 | # @prompts(name="Paraformer", 56 | # description="useful when you want to recognize the Chinese context of a Chinese audio file. " 57 | # "The input to this tool should be a string, representing the image_path. ") 58 | # def inference(self, filename): 59 | 60 | # result = self.pipeline_asr(filename) 61 | # return result 62 | -------------------------------------------------------------------------------- /society/body_reshaping/agent.py: -------------------------------------------------------------------------------- 1 | from modelscope.pipelines import pipeline 2 | from modelscope.utils.constant import Tasks 3 | from modelscope.outputs import OutputKeys 4 | import torch 5 | from PIL import Image 6 | import os 7 | import uuid 8 | import cv2 9 | 10 | def prompts(name, description): 11 | def decorator(func): 12 | func.name = name 13 | func.description = description 14 | return func 15 | 16 | return decorator 17 | 18 | 19 | class SAFG: 20 | def __init__(self, device="cuda:0"): 21 | self.device = device 22 | model_id = 'damo/cv_flow-based-body-reshaping_damo' 23 | self.pipeline_image_body_reshaping = pipeline(Tasks.image_body_reshaping, model=model_id) 24 | 25 | @prompts(name="SAFG", 26 | description="Useful when you want to make the body in the photo more beautiful. Receives image_path as input." 27 | "Applications involving scenes that require body contouring." 28 | "The input to this tool should be a string, representing the image_path.") 29 | def inference(self, image_path): 30 | 31 | image_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.png") 32 | #image_filename = f"{str(uuid.uuid4())[:8]}.png" 33 | image_path = image_path.strip("\n") 34 | result = self.pipeline_image_body_reshaping(image_path) 35 | cv2.imwrite(image_filename, result[OutputKeys.OUTPUT_IMG]) 36 | return image_filename 37 | 38 | 39 | if __name__ == "__main__": 40 | skin_touching_model = SAFG(device="cuda:0") 41 | image = skin_touching_model.inference("d317f96a.png") -------------------------------------------------------------------------------- /society/community.py: -------------------------------------------------------------------------------- 1 | """ 2 | In these natural language-based societies of mind (NLSOMs), new agents—--all communicating through the same universal symbolic language—are easily added in a modular fashion. 3 | We view this as a starting point towards much larger NLSOMs with billions of agents—some of which may be humans. 4 | """ 5 | 6 | # 16 communities / 34 agents 7 | 8 | # CPU Only 9 | from society.audio_recognition.agent import Whisper # 1 10 | from society.object_detection.agent import DETR # 1 11 | from society.ocr.agent import EasyOCR # 1 12 | from society.role_play.agent import LiuBei, GuanYu, ZhangFei, ZhugeLiang #4 13 | from society.search.agent import SE_A, SE_B, SE_C, SE_D #4 14 | from society.sentence_refine.agent import SentenceRefine # 1 15 | from society.text_to_image.agent import AnythingV4 #1 & [6 Candidates] AnythingV3, OpenJourneyV4, OpenJourney, StableDiffusionV15, StableDiffusionV21B, StableDiffusionV21 # 7 16 | from society.text_to_speech.agent import TTS #1 17 | from society.text_to_video.agent import DeforumSD #1 18 | 19 | # Need GPU 20 | from society.body_reshaping.agent import SAFG # 1 21 | from society.image_captioning.agent import OFA_large_captioning # 1 & [3 Candiates] OFA_distilled_captioning, BLIP2_captioning, mPLUG_captioning, OFA_large_captioning 22 | from society.image_colorization.agent import DDColor # 1 & [1 Candiates] UNet 23 | from society.image_deblur.agent import NAFNet # 1 24 | from society.image_to_3d.agent import HRNet # 1 25 | from society.skin_retouching.agent import ABPN # 1 26 | from society.vqa.agent import BLIP2_VQA, mPLUG_VQA, OFA_VQA #3 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /society/image_captioning/agent.py: -------------------------------------------------------------------------------- 1 | from modelscope.pipelines import pipeline 2 | from modelscope.utils.constant import Tasks 3 | from modelscope.outputs import OutputKeys 4 | 5 | import torch 6 | from PIL import Image 7 | import requests 8 | from transformers import Blip2Processor, Blip2ForConditionalGeneration 9 | import os 10 | 11 | def prompts(name, description): 12 | def decorator(func): 13 | func.name = name 14 | func.description = description 15 | return func 16 | 17 | return decorator 18 | 19 | class OFA_large_captioning: 20 | def __init__(self, device="cuda:0"): 21 | self.device = device 22 | self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32 23 | model_id = 'damo/ofa_image-caption_coco_large_en' 24 | self.pipeline_caption = pipeline(Tasks.image_captioning, model=model_id, model_revision='v1.0.1') 25 | 26 | @prompts(name="OFA_large_captioning", 27 | description="Useful when you want to know what is inside the photo. Receives image_path as input. " 28 | "If there are other captioning methods, it is also suggested to utilize other captioning methods to better know the image." 29 | "The input to this tool should be a string, representing the image_path. ") 30 | def inference(self, image_path): 31 | image_path = image_path.strip("\n") 32 | captions = self.pipeline_caption(image_path)[OutputKeys.CAPTION] 33 | return captions[0] 34 | 35 | 36 | # You can add these 3 candidates 37 | # class mPLUG_captioning: 38 | # def __init__(self, device="cuda:0"): 39 | # self.device = device 40 | # self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32 41 | # model_id = 'damo/mplug_image-captioning_coco_large_en' 42 | # self.pipeline_caption = pipeline(Tasks.image_captioning, model=model_id) 43 | 44 | # @prompts(name="mPLUG_captioning", 45 | # description="Useful when you want to know what is inside the photo. Receives image_path as input. " 46 | # "If there are other captioning methods, it is also suggested to utilize other captioning methods to better know the image." 47 | # "The input to this tool should be a string, representing the image_path. ") 48 | # def inference(self, image_path): 49 | # image_path = image_path.strip("\n") 50 | # captions = self.pipeline_caption(image_path) 51 | # return captions["caption"] 52 | 53 | 54 | # class OFA_distilled_captioning: 55 | # def __init__(self, device="cuda:0"): 56 | # self.device = device 57 | # self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32 58 | # model_id = 'damo/ofa_image-caption_coco_large_en' 59 | # self.pipeline_caption = pipeline(Tasks.image_captioning, model=model_id, model_revision='v1.0.1') 60 | 61 | # @prompts(name="OFA_distilled_captioning", 62 | # description="Useful when you want to know what is inside the photo. Receives image_path as input. " 63 | # "If there are other captioning methods, it is also suggested to utilize other captioning methods to better know the image." 64 | # "The input to this tool should be a string, representing the image_path. ") 65 | # def inference(self, image_path): 66 | # image_path = image_path.strip("\n") 67 | # captions = self.pipeline_caption(image_path)[OutputKeys.CAPTION] 68 | # return captions 69 | 70 | # class BLIP2_captioning: 71 | # def __init__(self, device="cuda:0"): 72 | # self.device = device 73 | # model_id = 'Salesforce/blip2-flan-t5-xl' 74 | # self.processor = Blip2Processor.from_pretrained(model_id) 75 | # self.BLIP2_MODEL = Blip2ForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16) 76 | 77 | # @prompts(name="BLIP2_captioning", 78 | # description="Useful when you want to know what is inside the photo. Receives image_path as input." 79 | # "If there are other captioning methods, it is also suggested to utilize other captioning methods to better know the image." 80 | # "The input to this tool should be a string, representing the image_path. ") 81 | # def inference(self, input): 82 | 83 | # raw_image = Image.open(input).convert("RGB") 84 | # inputs = self.processor(images=raw_image, return_tensors="pt").to("cuda", torch.float16) 85 | # generated_answer_ids = self.BLIP2_MODEL.generate(**inputs) 86 | # answer = self.processor.batch_decode(generated_answer_ids, skip_special_tokens=True)[0].strip() 87 | # return answer 88 | 89 | 90 | if __name__ == "__main__": 91 | ic = BLIP2_captioning("cuda:0") 92 | desc = ic.inference("d317f96a.png") 93 | 94 | -------------------------------------------------------------------------------- /society/image_colorization/agent.py: -------------------------------------------------------------------------------- 1 | from modelscope.pipelines import pipeline 2 | from modelscope.utils.constant import Tasks 3 | from modelscope.outputs import OutputKeys 4 | import torch 5 | import os 6 | import uuid 7 | import cv2 8 | 9 | def prompts(name, description): 10 | def decorator(func): 11 | func.name = name 12 | func.description = description 13 | return func 14 | 15 | return decorator 16 | 17 | class DDColor: 18 | def __init__(self, device="cuda:0"): 19 | self.device = device 20 | self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32 21 | model_id = 'damo/cv_ddcolor_image-colorization' 22 | self.pipeline_colorization = pipeline(Tasks.image_colorization, model=model_id) 23 | 24 | @prompts(name="DDColor", 25 | description="Useful when you make a gray image into color. Receives image_path as input. " 26 | "The input to this tool should be a string, representing the image_path. ") 27 | def inference(self, image_path): 28 | 29 | image_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.png") 30 | #image_filename = "result.png" 31 | image_path = image_path.strip("\n") 32 | result = self.pipeline_colorization(image_path) 33 | cv2.imwrite(image_filename, result[OutputKeys.OUTPUT_IMG]) 34 | return image_filename 35 | 36 | 37 | # class UNet: 38 | # def __init__(self, device): 39 | # print(f"Initializing UNet to {device}") 40 | # self.device = device 41 | # self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32 42 | # model_id = 'damo/cv_unet_image-colorization' 43 | # self.pipeline_colorization = pipeline(Tasks.image_colorization, model=model_id) 44 | 45 | # @prompts(name="UNet (Image Colorization)", 46 | # description="Useful when you make a gray image into color. Receives image_path as input. " 47 | # "The input to this tool should be a string, representing the image_path. ") 48 | # def inference(self, image_path): 49 | 50 | # image_filename = f"{str(uuid.uuid4())[:8]}.png"#os.path.join('data', f"{str(uuid.uuid4())[:8]}.png") 51 | # image_path = image_path.strip("\n") 52 | # result = self.pipeline_colorization(image_path) 53 | # cv2.imwrite(image_filename, result['output_img']) 54 | # return image_filename 55 | 56 | if __name__ == "__main__": 57 | color_model = DDColor(device="cuda:0") 58 | img = color_model.inference("xyz321.jpeg") 59 | print(img) -------------------------------------------------------------------------------- /society/image_deblur/agent.py: -------------------------------------------------------------------------------- 1 | from modelscope.pipelines import pipeline 2 | from modelscope.utils.constant import Tasks 3 | from modelscope.outputs import OutputKeys 4 | import torch 5 | import os 6 | import uuid 7 | import cv2 8 | 9 | def prompts(name, description): 10 | def decorator(func): 11 | func.name = name 12 | func.description = description 13 | return func 14 | 15 | return decorator 16 | 17 | 18 | class NAFNet: 19 | def __init__(self, device): 20 | print(f"Initializing NAFNet to {device}") 21 | self.device = device 22 | self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32 23 | model_id = 'damo/cv_nafnet_image-deblur_reds' 24 | self.image_deblur_pipeline = pipeline(Tasks.image_deblurring, model=model_id) 25 | 26 | @prompts(name="NAFNet", 27 | description="Useful when you turn a blurry photo into a clear one. Receives image_path as input. " 28 | "The input to this tool should be a string, representing the image_path. ") 29 | def inference(self, image_path): 30 | 31 | image_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.png") 32 | image_path = image_path.strip("\n") 33 | result = self.image_deblur_pipeline(image_path) 34 | cv2.imwrite(image_filename, result[OutputKeys.OUTPUT_IMG]) 35 | return image_filename 36 | 37 | if __name__ == "__main__": 38 | color_model = NAFNet(device="cuda:0") 39 | docs = color_model.inference("blurry.jpg") 40 | print(docs) -------------------------------------------------------------------------------- /society/image_to_3d/agent.py: -------------------------------------------------------------------------------- 1 | from modelscope.pipelines import pipeline 2 | from modelscope.utils.constant import Tasks 3 | from modelscope.outputs import OutputKeys 4 | import os 5 | import uuid 6 | import urllib.request 7 | import shutil 8 | 9 | 10 | def prompts(name, description): 11 | def decorator(func): 12 | func.name = name 13 | func.description = description 14 | return func 15 | 16 | return decorator 17 | 18 | 19 | class HRNet: 20 | def __init__(self, device="cuda:0"): 21 | model_id = 'damo/cv_hrnet_image-human-reconstruction' 22 | self.human_reconstruction_pipeline = pipeline(Tasks.human_reconstruction, model=model_id) 23 | 24 | @prompts(name="HRNet", 25 | description="Useful when you turn a personal photo into 3D mesh. Receives image_path as input. " 26 | "The input to this tool should be a string, representing the image_path. ") 27 | def inference(self, image_path): 28 | 29 | color_filename = os.path.join('data', f"human_color_{str(uuid.uuid4())[:8]}.obj") 30 | recon_filename = os.path.join('data', f"human_reconstruction_{str(uuid.uuid4())[:8]}.obj") 31 | image_path = image_path.strip("\n") 32 | result = self.human_reconstruction_pipeline(image_path) 33 | mesh = result[OutputKeys.OUTPUT] 34 | shutil.move("human_color.obj", color_filename) 35 | shutil.move("human_reconstruction.obj", recon_filename) 36 | return recon_filename 37 | 38 | if __name__ == "__main__": 39 | hr_model = HRNet(device="cuda:0") 40 | docs = hr_model.inference("data/WechatIMG899.jpeg") 41 | print(docs) -------------------------------------------------------------------------------- /society/object_detection/agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | 4 | def prompts(name, description): 5 | def decorator(func): 6 | func.name = name 7 | func.description = description 8 | return func 9 | 10 | return decorator 11 | 12 | class DETR: 13 | def __init__(self, device="cpu"): 14 | self.device = device 15 | self.API_URL = "https://api-inference.huggingface.co/models/facebook/detr-resnet-50" 16 | self.headers = {"Authorization": "Bearer "+os.getenv("HUGGINGFACE_ACCESS_Tokens")} 17 | 18 | @prompts(name="DETR (object detection)", 19 | description="useful when you want to detect the objects in an image. " 20 | "The input to this tool should be a string, representing input image file. ") 21 | def inference(self, filename): 22 | 23 | output = self.query(filename) 24 | return output 25 | 26 | 27 | def query(self, filename): 28 | 29 | with open(filename, "rb") as f: 30 | data = f.read() 31 | 32 | response = requests.post(self.API_URL, headers=self.headers, data=data) 33 | 34 | return response.json() 35 | 36 | 37 | if __name__ == "__main__": 38 | object_detection_model = DETR(device="cpu") 39 | result = object_detection_model.inference("xyz.png") 40 | print(result) -------------------------------------------------------------------------------- /society/ocr/agent.py: -------------------------------------------------------------------------------- 1 | import easyocr 2 | 3 | def prompts(name, description): 4 | def decorator(func): 5 | func.name = name 6 | func.description = description 7 | return func 8 | 9 | return decorator 10 | 11 | class EasyOCR: 12 | def __init__(self, device="cpu"): 13 | self.ocr = easyocr.Reader(['ch_sim','en']) 14 | 15 | @prompts(name="EasyOCR", 16 | description="useful when you want to recognize the word or text in an image. " 17 | "The input to this tool should be a string, representing the image_path. ") 18 | def inference(self, filename): 19 | 20 | text = "" 21 | result = self.ocr.readtext(filename) 22 | for item in result: 23 | text += " " + item[1] 24 | return text.strip() 25 | 26 | 27 | if __name__ == "__main__": 28 | ocr_model = EasyOCR() 29 | result = ocr_model.inference('00155719.png') 30 | print(result) -------------------------------------------------------------------------------- /society/role_play/agent.py: -------------------------------------------------------------------------------- 1 | # A Simple Role-Play Framework 2 | 3 | import os 4 | import openai 5 | 6 | openai.api_key = os.getenv('OPENAI_API_KEY') 7 | 8 | from tenacity import ( 9 | retry, 10 | stop_after_attempt, 11 | wait_random_exponential, 12 | ) 13 | 14 | def prompts(name, description): 15 | def decorator(func): 16 | func.name = name 17 | func.description = description 18 | return func 19 | 20 | return decorator 21 | 22 | 23 | 24 | def generate_response(task, role, desc): 25 | 26 | prompt = f"""You are now playing a special role: {role}. 27 | As a real figure in history, it is essential that your responses align with your identity and character. 28 | Please provide accurate and historically appropriate answers in accordance with the context of your persona. 29 | 30 | To enhance your understanding of your character, I will provide you with valuable tips to deepen your self-awareness. 31 | Allow me to present a foundational introduction to the role you are assuming: {desc}. 32 | 33 | From your unique perspective, please respond to the following question: {task}. 34 | keeping in mind your personal experiences, distinct personality, and the freedom to employ hyperbolic statements to emphasize your answer and showcase your persona. 35 | 36 | Answer: According to your persona to give your brief solution. 37 | """ 38 | 39 | llm = openai.Completion.create( 40 | engine="text-davinci-003", 41 | prompt=prompt, 42 | max_tokens=200, 43 | temperature=0.7, 44 | n=1, 45 | stop=None, 46 | ) 47 | answer = llm["choices"][0]["text"].strip() 48 | 49 | return answer 50 | 51 | class GuanYu: 52 | template_model = True 53 | def __init__(self, device="cpu"): 54 | self.device = device 55 | 56 | 57 | @prompts(name="GuanYu", 58 | description="A role-play agent named GuanYu, you can query him to answer his opinion" 59 | "Useful for when you need to discuss with a role-play agent, " 60 | "Input should be a question, output is the answer of this question") 61 | 62 | def inference(self, text): 63 | 64 | role_desc = """Guan Yu, courtesy name Yunchang, was a Chinese military general serving under the warlord Liu Bei during the late Eastern Han dynasty of China. 65 | Along with Zhang Fei, he shared a brotherly relationship with Liu Bei and accompanied him on most of his early exploits. 66 | Guan Yu played a significant role in the events leading up to the end of the Han dynasty and the establishment of Liu Bei's state of Shu Han during the Three Kingdoms period. 67 | """ 68 | 69 | answer = generate_response(text, "GuanYu", role_desc) 70 | return answer 71 | 72 | class LiuBei: 73 | template_model = True 74 | def __init__(self, device="cpu"): 75 | self.device = device 76 | 77 | 78 | @prompts(name="LiuBei", 79 | description="A role-play agent named LiuBei, you can query him to answer his opinion" 80 | "Useful for when you need to discuss with a role-play agent, " 81 | "Input should be a question, output is the answer of this question") 82 | 83 | def inference(self, text): 84 | 85 | role_desc = """Liu Bei is widely regarded as the ideal benevolent and humane ruler who cared for his people and selected good advisers for his government. 86 | His fictional counterpart in the novel was a salutary example of a ruler who adhered to the Confucian set of moral values, such as loyalty and compassion. 87 | Historically, Liu Bei, like many Han rulers, was greatly influenced by Laozi. He was a brilliant politician and leader whose skill was a remarkable demonstration of "Confucian in appearance but Legalist in substance". 88 | """ 89 | 90 | answer = generate_response(text, "LiuBei", role_desc) 91 | return answer 92 | 93 | class ZhugeLiang: 94 | template_model = True 95 | def __init__(self, device="cpu"): 96 | self.device = device 97 | 98 | @prompts(name="ZhugeLiang", 99 | description="A role-play agent named ZhugeLiang, you can query him to answer his opinion" 100 | "Useful for when you need to discuss with a role-play agent, " 101 | "Input should be a question, output is the answer of this question") 102 | 103 | def inference(self, text): 104 | 105 | role_desc = """Zhuge Liang, courtesy name Kǒngmíng was a Chinese military engineer, strategist, statesman, and writer. 106 | He was chancellor and later regent of the state of Shu Han during the Three Kingdoms period. 107 | He is recognised as the most accomplished strategist of his era, and has been compared to Sun Tzu, the author of The Art of War. 108 | His reputation as an intelligent and learned scholar grew even while he was living in relative seclusion, earning him the nickname "Wolong" or "Fulong", meaning "Crouching Dragon" or "Sleeping Dragon". 109 | Zhuge Liang is often depicted wearing a Taoist robe and holding a hand fan made of crane feathers. 110 | Zhuge Liang was a Confucian-oriented "Legalist". 111 | In remembrance of his governance, local people maintained shrines to him for ages. 112 | His name has become synonymous with wisdom and strategy in Chinese culture 113 | """ 114 | 115 | answer = generate_response(text, "ZhugeLiang", role_desc) 116 | return answer 117 | 118 | class ZhangFei: 119 | template_model = True 120 | def __init__(self, device="cuda:0"): 121 | self.device = device 122 | 123 | @prompts(name="ZhangFei", 124 | description="A role-play agent named ZhangFei, you can query him to answer his opinion" 125 | "Useful for when you need to discuss with a role-play agent, " 126 | "Input should be a question, output is the answer of this question") 127 | 128 | def inference(self, text): 129 | 130 | role_desc = """Zhang Fei, courtesy name Yide, was a military general serving under the warlord Liu Bei in the late Eastern Han dynasty and early Three Kingdoms period of China. 131 | Zhang Fei and Guan Yu, who were among the earliest to join Liu Bei, shared a brotherly relationship with their lord and accompanied him on most of his early exploits. 132 | Zhang Fei was shown as an exceedingly loyal and formidable warrior, but also a short-tempered man, who often got into trouble more often when he was not on the battlefield. 133 | """ 134 | 135 | answer = generate_response(text, "ZhangFei", role_desc) 136 | return answer 137 | 138 | if __name__ == "__main__": 139 | role = GuanYu() 140 | ans = role.inference("If you are in the Three Kingdoms period now, how to defeat Cao Cao?") 141 | print(ans) 142 | 143 | 144 | -------------------------------------------------------------------------------- /society/search/agent.py: -------------------------------------------------------------------------------- 1 | # Since LangChain is biased toward each tool, we make each search engine anonymous. 2 | # We will update a version of "Mindstorm" in LangChain, or we write a new NLSOM framework in the future. 3 | # For BingSearch, please refer to https://www.microsoft.com/en-us/bing/apis/bing-web-search-api 4 | 5 | import os 6 | import numpy as np 7 | 8 | from langchain.tools import Tool 9 | from langchain.utilities import ArxivAPIWrapper 10 | from langchain.utilities import WikipediaAPIWrapper 11 | from langchain.utilities import BingSearchAPIWrapper 12 | # from langchain.utilities import GoogleSearchAPIWrapper 13 | # from langchain.tools import DuckDuckGoSearchRun 14 | from langchain.utilities.wolfram_alpha import WolframAlphaAPIWrapper 15 | import numpy as np 16 | 17 | def prompts(name, description): 18 | def decorator(func): 19 | func.name = name 20 | func.description = description 21 | return func 22 | 23 | return decorator 24 | 25 | 26 | class SE_A: 27 | def __init__(self, device="cpu"): 28 | self.device = device 29 | self.wolfram = WolframAlphaAPIWrapper() 30 | 31 | @prompts(name="SE_A", 32 | #"A wrapper around www.wolfram.org " 33 | description="A wrapper around XXX.org " 34 | "Useful for when you need to search information from the internet, " 35 | "Input should be a search query.") 36 | def inference(self, text): 37 | 38 | docs = self.wolfram.run(text) 39 | return docs.split("\n")[0] 40 | 41 | class SE_B: 42 | def __init__(self, device="cpu"): 43 | self.device = "cpu" 44 | self.arxiv = ArxivAPIWrapper() 45 | 46 | @prompts(name="SE_B", 47 | # "A wrapper around Arxiv.org " 48 | description="A wrapper around XXX.org " 49 | "Useful for when you need to search information, especially academia information, " 50 | "Input should be a search query.") 51 | def inference(self, text): 52 | docs = self.arxiv.run(text) 53 | return docs.split("\n\n")[np.random.randint(0,3)] 54 | 55 | 56 | class SE_C: 57 | def __init__(self, device="cpu"): 58 | self.device = "cpu" 59 | self.wikipedia = WikipediaAPIWrapper() 60 | 61 | @prompts(name="SE_C", 62 | description="A wrapper around XXX.org " 63 | "Useful for when you need to search information from the internet, " 64 | "Input should be a search query.") 65 | def inference(self, text): 66 | docs = self.wikipedia.run(text) 67 | return docs.split("\n\n")[0] #.split("\n")[0:2] 68 | 69 | class SE_D: 70 | def __init__(self, device="cpu"): 71 | self.device = "cpu" 72 | self.bing = BingSearchAPIWrapper() 73 | 74 | @prompts(name="SE_D", 75 | # "A wrapper around Microsoft bing.com," 76 | description="A wrapper around XXX.com," 77 | "Useful for when you need to search information from the internet, " 78 | "Input should be a search query.") 79 | def inference(self, text): 80 | docs = self.bing.run(text) 81 | return docs.split("\n")[0:5] 82 | 83 | 84 | # class DuckDuckGo: 85 | # def __init__(self, device="cpu"): 86 | # self.device = "cpu" 87 | # self.DuckDuckGo = DuckDuckGoSearchRun() 88 | 89 | # @prompts(name="DuckDuckGo", 90 | # description="A wrapper around search engine DuckDuckGo," 91 | # "Useful for when you need to search information from the internet, " 92 | # "Input should be a search query.") 93 | # def inference(self, text): 94 | # docs = self.DuckDuckGo.run(text) 95 | # return docs 96 | 97 | 98 | if __name__ == "__main__": 99 | 100 | bing = SE_D() 101 | docs = bing.inference("AGI") 102 | -------------------------------------------------------------------------------- /society/sentence_refine/agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import openai 3 | from tenacity import ( 4 | retry, 5 | stop_after_attempt, 6 | wait_random_exponential, 7 | ) 8 | 9 | openai.api_key = os.getenv('OPENAI_API_KEY') 10 | 11 | def prompts(name, description): 12 | def decorator(func): 13 | func.name = name 14 | func.description = description 15 | return func 16 | 17 | return decorator 18 | 19 | class SentenceRefine: 20 | template_model = True 21 | def __init__(self, device="cuda:0"): 22 | self.device = device 23 | 24 | @prompts(name="SentenceRefine", 25 | description="A wrapper around Microsoft bing.com," 26 | "Useful for when you need to search information from the internet, " 27 | "Input should be a search query.") 28 | def inference(self, text): 29 | prompt = "Please revise and slightly expand the following sentence: " + text 30 | self.instructgpt = openai.Completion.create( 31 | engine="text-davinci-003", 32 | prompt=prompt, 33 | temperature=0.5, 34 | max_tokens=200, 35 | ) 36 | answer = self.instructgpt["choices"][0]["text"].strip("\n") 37 | return answer 38 | 39 | if __name__ == "__main__": 40 | refine= SentenceRefine() 41 | ans = refine.inference("AGI is comming.") 42 | # AGI (Artificial General Intelligence) is rapidly approaching, with many experts predicting that it will be achieved within the next few decades. 43 | print(ans) -------------------------------------------------------------------------------- /society/skin_retouching/agent.py: -------------------------------------------------------------------------------- 1 | from modelscope.pipelines import pipeline 2 | from modelscope.utils.constant import Tasks 3 | from modelscope.outputs import OutputKeys 4 | import torch 5 | import os 6 | import uuid 7 | import cv2 8 | 9 | def prompts(name, description): 10 | def decorator(func): 11 | func.name = name 12 | func.description = description 13 | return func 14 | 15 | return decorator 16 | 17 | 18 | class ABPN: 19 | def __init__(self, device="cuda:0"): 20 | self.device = device 21 | self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32 22 | model_id = 'damo/cv_unet_skin-retouching' 23 | self.pipeline_skin_retouching = pipeline(Tasks.skin_retouching, model=model_id) 24 | 25 | @prompts(name="ABPN", 26 | description="Useful when you want to make the face in the photo more beautiful. Receives image_path as input." 27 | "Applications involving skin beautification, such as photo retouching." 28 | "The input to this tool should be a string, representing the image_path.") 29 | def inference(self, image_path): 30 | 31 | image_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.png") 32 | #image_filename = "result.png" 33 | image_path = image_path.strip("\n") 34 | result = self.pipeline_skin_retouching(image_path) 35 | cv2.imwrite(image_filename, result[OutputKeys.OUTPUT_IMG]) 36 | return image_filename 37 | 38 | 39 | if __name__ == "__main__": 40 | skin_touching_model = ABPN(device="cuda:0") 41 | image = skin_touching_model.inference("xyz456.png") -------------------------------------------------------------------------------- /society/text_to_image/agent.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import os 3 | import io 4 | import torch 5 | from PIL import Image 6 | import requests 7 | 8 | def prompts(name, description): 9 | def decorator(func): 10 | func.name = name 11 | func.description = description 12 | return func 13 | 14 | return decorator 15 | 16 | class AnythingV4: 17 | def __init__(self, device="cpu"): 18 | self.device = device 19 | self.API_URL = "https://api-inference.huggingface.co/models/andite/anything-v4.0" 20 | self.headers = {"Authorization": "Bearer "+os.getenv("HUGGINGFACE_ACCESS_Tokens")} 21 | 22 | self.a_prompt = 'best quality, extremely detailed' 23 | 24 | @prompts(name="AnythingV4", 25 | description="useful when you want to generate an image from a user input text and save it to a file. " 26 | "like: generate an image of an object or something, or generate an image that includes some objects. " 27 | "The input to this tool should be a string, representing the text used to generate image. ") 28 | def inference(self, text): 29 | 30 | image_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.png") 31 | #image_filename = "result.png" 32 | prompt = str(text + ', ' + self.a_prompt) 33 | 34 | image_bytes = self.query({ 35 | "inputs": prompt, 36 | }) 37 | 38 | image = Image.open(io.BytesIO(image_bytes)) 39 | image.save(image_filename) 40 | return image_filename 41 | 42 | def query(self, payload): 43 | response = requests.post(self.API_URL, headers=self.headers, json=payload) 44 | return response.content 45 | 46 | 47 | # class AnythingV3: 48 | # def __init__(self, device="cpu"): 49 | # self.device = device 50 | # self.API_URL = "https://api-inference.huggingface.co/models/Linaqruf/anything-v3.0" 51 | # self.headers = {"Authorization": "Bearer "+os.getenv("HUGGINGFACE_ACCESS_Tokens")} 52 | 53 | # self.a_prompt = 'best quality, extremely detailed' 54 | 55 | # @prompts(name="AnythingV3", 56 | # description="useful when you want to generate an image from a user input text and save it to a file. " 57 | # "like: generate an image of an object or something, or generate an image that includes some objects. " 58 | # "The input to this tool should be a string, representing the text used to generate image. ") 59 | # def inference(self, text): 60 | 61 | # image_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.png") 62 | # #image_filename = "result.png" 63 | # prompt = str(text + ', ' + self.a_prompt) 64 | 65 | # image_bytes = self.query({ 66 | # "inputs": prompt, 67 | # }) 68 | 69 | # image = Image.open(io.BytesIO(image_bytes)) 70 | # image.save(image_filename) 71 | # return image_filename 72 | 73 | # def query(self, payload): 74 | # response = requests.post(self.API_URL, headers=self.headers, json=payload) 75 | # return response.content 76 | 77 | 78 | # class OpenJourneyV4: 79 | # def __init__(self, device="cpu"): 80 | # self.device = device 81 | # self.API_URL = "https://api-inference.huggingface.co/models/prompthero/openjourney-v4" 82 | # self.headers = {"Authorization": "Bearer "+os.getenv("HUGGINGFACE_ACCESS_Tokens")} 83 | 84 | # self.a_prompt = 'best quality, extremely detailed' 85 | 86 | # @prompts(name="OpenJourneyV4", 87 | # description="useful when you want to generate an image from a user input text and save it to a file. " 88 | # "like: generate an image of an object or something, or generate an image that includes some objects. " 89 | # "The input to this tool should be a string, representing the text used to generate image. ") 90 | # def inference(self, text): 91 | 92 | # image_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.png") 93 | # #image_filename = "result.png" 94 | # prompt = str(text + ', ' + self.a_prompt) 95 | 96 | # image_bytes = self.query({ 97 | # "inputs": prompt, 98 | # }) 99 | 100 | # image = Image.open(io.BytesIO(image_bytes)) 101 | # image.save(image_filename) 102 | # return image_filename 103 | 104 | # def query(self, payload): 105 | # response = requests.post(self.API_URL, headers=self.headers, json=payload) 106 | # return response.content 107 | 108 | 109 | # class OpenJourney: 110 | # def __init__(self, device="cpu"): 111 | # self.device = device 112 | # self.API_URL = "https://api-inference.huggingface.co/models/prompthero/openjourney" 113 | # self.headers = {"Authorization": "Bearer "+os.getenv("HUGGINGFACE_ACCESS_Tokens")} 114 | 115 | # self.a_prompt = 'best quality, extremely detailed' 116 | 117 | # @prompts(name="OpenJourney", 118 | # description="useful when you want to generate an image from a user input text and save it to a file. " 119 | # "like: generate an image of an object or something, or generate an image that includes some objects. " 120 | # "The input to this tool should be a string, representing the text used to generate image. ") 121 | # def inference(self, text): 122 | 123 | # image_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.png") 124 | # #image_filename = "result.png" 125 | # prompt = str(text + ', ' + self.a_prompt) 126 | 127 | # image_bytes = self.query({ 128 | # "inputs": prompt, 129 | # }) 130 | 131 | # image = Image.open(io.BytesIO(image_bytes)) 132 | # image.save(image_filename) 133 | # return image_filename 134 | 135 | # def query(self, payload): 136 | # response = requests.post(self.API_URL, headers=self.headers, json=payload) 137 | # return response.content 138 | 139 | 140 | # class StableDiffusionV15: 141 | # def __init__(self, device="cpu"): 142 | # self.device = device 143 | # self.API_URL = "https://api-inference.huggingface.co/models/runwayml/stable-diffusion-v1-5" 144 | # self.headers = {"Authorization": "Bearer "+os.getenv("HUGGINGFACE_ACCESS_Tokens")} 145 | 146 | # self.a_prompt = 'best quality, extremely detailed' 147 | 148 | # @prompts(name="StableDiffusionV15", 149 | # description="useful when you want to generate an image from a user input text and save it to a file. " 150 | # "like: generate an image of an object or something, or generate an image that includes some objects. " 151 | # "The input to this tool should be a string, representing the text used to generate image. ") 152 | # def inference(self, text): 153 | 154 | # image_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.png") 155 | # #image_filename = "result.png" 156 | # prompt = str(text + ', ' + self.a_prompt) 157 | 158 | # image_bytes = self.query({ 159 | # "inputs": prompt, 160 | # }) 161 | 162 | # image = Image.open(io.BytesIO(image_bytes)) 163 | # image.save(image_filename) 164 | # return image_filename 165 | 166 | # def query(self, payload): 167 | # response = requests.post(self.API_URL, headers=self.headers, json=payload) 168 | # return response.content 169 | 170 | 171 | # class StableDiffusionV21B: 172 | # def __init__(self, device="cpu"): 173 | # self.device = device 174 | # self.API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1-base" 175 | # self.headers = {"Authorization": "Bearer "+os.getenv("HUGGINGFACE_ACCESS_Tokens")} 176 | 177 | # self.a_prompt = 'best quality, extremely detailed' 178 | 179 | # @prompts(name="StableDiffusionV21B", 180 | # description="useful when you want to generate an image from a user input text and save it to a file. " 181 | # "like: generate an image of an object or something, or generate an image that includes some objects. " 182 | # "The input to this tool should be a string, representing the text used to generate image. ") 183 | # def inference(self, text): 184 | 185 | # image_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.png") 186 | # #image_filename = "result.png" 187 | # prompt = str(text + ', ' + self.a_prompt) 188 | 189 | # image_bytes = self.query({ 190 | # "inputs": prompt, 191 | # }) 192 | 193 | # image = Image.open(io.BytesIO(image_bytes)) 194 | # image.save(image_filename) 195 | # return image_filename 196 | 197 | # def query(self, payload): 198 | # response = requests.post(self.API_URL, headers=self.headers, json=payload) 199 | # return response.content 200 | 201 | 202 | # class StableDiffusionV21: 203 | # def __init__(self, device="cpu"): 204 | # self.device = device 205 | # self.API_URL = "https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-2-1" 206 | # self.headers = {"Authorization": "Bearer "+os.getenv("HUGGINGFACE_ACCESS_Tokens")} 207 | 208 | # self.a_prompt = 'best quality, extremely detailed' 209 | 210 | # @prompts(name="StableDiffusionV21", 211 | # description="useful when you want to generate an image from a user input text and save it to a file. " 212 | # "like: generate an image of an object or something, or generate an image that includes some objects. " 213 | # "The input to this tool should be a string, representing the text used to generate image. ") 214 | # def inference(self, text): 215 | 216 | # image_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.png") 217 | # #image_filename = "result.png" 218 | # prompt = str(text + ', ' + self.a_prompt) 219 | 220 | # image_bytes = self.query({ 221 | # "inputs": prompt, 222 | # }) 223 | 224 | # image = Image.open(io.BytesIO(image_bytes)) 225 | # image.save(image_filename) 226 | # return image_filename 227 | 228 | # def query(self, payload): 229 | # response = requests.post(self.API_URL, headers=self.headers, json=payload) 230 | # return response.content 231 | 232 | 233 | if __name__ == "__main__": 234 | 235 | t2i = AnythingV4() 236 | t2i.inference("Chinese SuperMan in the Old Street.") -------------------------------------------------------------------------------- /society/text_to_speech/agent.py: -------------------------------------------------------------------------------- 1 | # https://github.com/coqui-ai/TTS 2 | import uuid 3 | import os 4 | import io 5 | from TTS.api import TTS 6 | # Running a multi-speaker and multi-lingual model 7 | 8 | 9 | def prompts(name, description): 10 | def decorator(func): 11 | func.name = name 12 | func.description = description 13 | return func 14 | 15 | return decorator 16 | 17 | class TTS: 18 | def __init__(self, device): 19 | self.device = device 20 | self.torch_dtype = "cpu" 21 | model_name = TTS.list_models()[0] 22 | self.tts = TTS(model_name) 23 | 24 | 25 | @prompts(name="TTS", 26 | description="useful when you want to generate an audio from a user input text and save it to a file. " 27 | "The input to this tool should be a string, representing the text used to generate audio. ") 28 | def inference(self, text): 29 | 30 | audio_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.wav") 31 | #audio_filename = f"{str(uuid.uuid4())[:8]}.wav" 32 | 33 | self.tts.tts_to_file(text=text, speaker=self.tts.speakers[0], language=self.tts.languages[0], file_path=audio_filename) 34 | 35 | return audio_filename 36 | 37 | if __name__ == "__main__": 38 | tts_model = TTS(device="cuda:0") 39 | image = tts_model.inference("Wo zong shi lin shi bao fo jiao, lin shi bao fo jiao.") -------------------------------------------------------------------------------- /society/text_to_video/agent.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | import os 3 | import replicate 4 | import urllib.request 5 | import openai 6 | 7 | openai.api_key = os.getenv('OPENAI_API_KEY') 8 | 9 | def prompts(name, description): 10 | def decorator(func): 11 | func.name = name 12 | func.description = description 13 | return func 14 | 15 | return decorator 16 | 17 | 18 | class DeforumSD: 19 | def __init__(self, device="cpu"): 20 | self.device = device 21 | 22 | @prompts(name="DeforumSD", 23 | description="useful when you want to generate a video from a user input text and save it to a file. " 24 | "like: generate an video of an object or something, or generate an video that includes some objects. " 25 | "The input to this tool should be a string, representing the text used to generate video. ") 26 | def inference(self, text): 27 | 28 | 29 | script_prompt = "Unlock your creativity with artistic expression, then slighly expand this: " + text 30 | director = openai.Completion.create( 31 | engine="text-davinci-003", 32 | prompt=script_prompt, 33 | temperature=0.5, 34 | max_tokens=200, 35 | ) 36 | script = director["choices"][0]["text"].strip("\n") 37 | 38 | video_filename = os.path.join('data', f"{str(uuid.uuid4())[:8]}.mp4") 39 | 40 | output = replicate.run( 41 | "deforum/deforum_stable_diffusion:e22e77495f2fb83c34d5fae2ad8ab63c0a87b6b573b6208e1535b23b89ea66d6", 42 | input={"max_frames": 300, 43 | "animation_prompts": str(script) 44 | } 45 | ) 46 | 47 | urllib.request.urlretrieve(output, video_filename) 48 | print(output) 49 | 50 | return output#video_filename 51 | 52 | 53 | if __name__ == "__main__": 54 | t2v_model = DeforumSD(device="cpu") 55 | t2v_model.inference("0: "+ "Superman is saving the New York city! ") -------------------------------------------------------------------------------- /society/vqa/agent.py: -------------------------------------------------------------------------------- 1 | from modelscope.pipelines import pipeline 2 | from modelscope.utils.constant import Tasks 3 | from transformers import Blip2Processor, Blip2ForConditionalGeneration, AutoProcessor, AutoModelForCausalLM 4 | from huggingface_hub import hf_hub_download 5 | from modelscope.outputs import OutputKeys 6 | from modelscope.preprocessors.multi_modal import OfaPreprocessor 7 | import torch 8 | from PIL import Image 9 | 10 | def prompts(name, description): 11 | def decorator(func): 12 | func.name = name 13 | func.description = description 14 | return func 15 | 16 | return decorator 17 | 18 | class BLIP2_VQA: 19 | def __init__(self, device="cuda:0"): 20 | self.device = device 21 | self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32 22 | model_id = 'Salesforce/blip2-flan-t5-xl' 23 | self.processor = Blip2Processor.from_pretrained(model_id) 24 | self.BLIP2_MODEL = Blip2ForConditionalGeneration.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto") 25 | 26 | @prompts(name="BLIP2_VQA", 27 | description="Useful when you want to ask a question about the image. Receives image_path and question as inputs. " 28 | "When using this model, you can also consider to ask more question according to previous question." 29 | "If there are other VQA methods, it is also suggested to utilize other VQA methods to enhance your ability to answer the questions." 30 | "The input to this tool should be a string, representing the image_path.") 31 | def inference(self, input): 32 | try: 33 | image_path, question = input.split(",")[0].strip(), input.split(",")[1].strip() 34 | image_path = image_path.strip("\n") 35 | raw_image = Image.open(image_path).convert("RGB") 36 | except: 37 | print("No question as input, use the template: \"Describe this image in details\" as question") 38 | image_path = input.strip().strip("\n") 39 | raw_image = Image.open(image_path).convert("RGB") 40 | question = "Describe this image in details." 41 | 42 | input = self.processor(raw_image, question+"Answer with reason.", return_tensors="pt").to("cuda", torch.float16) 43 | generated_answer_ids = self.BLIP2_MODEL.generate(**input, max_new_tokens=20) 44 | answer = self.processor.batch_decode(generated_answer_ids, skip_special_tokens=True)[0].strip() 45 | 46 | return answer 47 | 48 | class mPLUG_VQA: 49 | def __init__(self, device="cuda:0"): 50 | self.device = device 51 | self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32 52 | model_id = 'damo/mplug_visual-question-answering_coco_large_en' 53 | self.pipeline_caption = pipeline(Tasks.visual_question_answering, model=model_id) 54 | 55 | @prompts(name="mPLUG_VQA", 56 | description="Useful when you want to ask some question about the image. Receives image_path and language-based question as inputs. " 57 | "When using this model, you can also consider to ask more question according to previous question." 58 | "If there are other VQA methods, it is also suggested to utilize other VQA methods to enhance your ability to answer the questions." 59 | "The input to this tool should be a string, representing the image_path. ") 60 | def inference(self, input): 61 | try: 62 | image_path, question = input.split(",")[0].strip(), input.split(",")[1].strip() 63 | except: 64 | print("No question as input, use the template: \"Describe this image in details\" as question") 65 | image_path = input.strip() 66 | question = "Describe this image in details." 67 | 68 | image_path = image_path.strip("\n") 69 | input = {'image': image_path, 'text': question+"Let's think step by step."} 70 | answer = self.pipeline_caption(input)["text"] 71 | print(f"\nProcessed VQA, Input Image: {image_path}, Input Question: {question}, Output Text: {answer}") 72 | return answer 73 | 74 | 75 | 76 | class OFA_VQA: 77 | def __init__(self, device="cuda:0"): 78 | print(f"Initializing OFA to {device}") 79 | self.device = device 80 | self.torch_dtype = torch.float16 if 'cuda' in device else torch.float32 81 | model_id = 'damo/ofa_visual-question-answering_pretrain_large_en' 82 | self.preprocessor = OfaPreprocessor(model_dir=model_id) 83 | self.ofa_pipe = pipeline( 84 | Tasks.visual_question_answering, 85 | model=model_id, 86 | model_revision='v1.0.1', 87 | preprocessor=self.preprocessor) 88 | 89 | @prompts(name="OFA_VQA", 90 | description="Useful when you want to ask some question about the image. Receives image_path and language-based question as inputs. " 91 | "When using this model, you can also consider to ask more question according to previous question." 92 | "If there are other VQA methods, it is also suggested to utilize other VQA methods to enhance your ability to answer the questions." 93 | "The input to this tool should be a string, representing the image_path. ") 94 | def inference(self, input): 95 | try: 96 | image_path, question = input.split(",")[0].strip(), input.split(",")[1].strip() 97 | except: 98 | print("No question as input, use the template: \"Describe this image in details\" as question") 99 | image_path = input.strip() 100 | question = "Describe this image in details." 101 | 102 | input = {'image': image_path, 'text': question+"Answer with reason."} 103 | answer = self.ofa_pipe(input)[OutputKeys.TEXT][0] 104 | print(f"\nProcessed VQA, Input Image: {image_path}, Input Question: {question}, Output Text: {answer}") 105 | return answer 106 | 107 | 108 | if __name__ == "__main__": 109 | #vqa = mPLUG_VQA("cuda:0") 110 | vqa = BLIP2_VQA("cuda:0") 111 | print(vqa.inference("xyz.png")) -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import re 4 | import shutil 5 | import sys 6 | from typing import List 7 | 8 | #import deeplake 9 | import openai 10 | import streamlit as st 11 | #from dotenv import load_dotenv 12 | from langchain.callbacks import OpenAICallbackHandler 13 | from langchain.chains.conversation.memory import ConversationBufferMemory 14 | from langchain.chat_models import ChatOpenAI 15 | from langchain.agents.initialize import initialize_agent 16 | from langchain.llms.openai import OpenAI 17 | 18 | 19 | from langchain.document_loaders import ( 20 | CSVLoader, 21 | DirectoryLoader, 22 | GitLoader, 23 | NotebookLoader, 24 | OnlinePDFLoader, 25 | PythonLoader, 26 | TextLoader, 27 | UnstructuredFileLoader, 28 | UnstructuredHTMLLoader, 29 | UnstructuredPDFLoader, 30 | UnstructuredWordDocumentLoader, 31 | WebBaseLoader, 32 | ) 33 | from langchain.schema import Document 34 | from langchain.text_splitter import RecursiveCharacterTextSplitter 35 | from streamlit.runtime.uploaded_file_manager import UploadedFile 36 | 37 | from setting import ( 38 | APP_NAME, 39 | CHUNK_SIZE, 40 | DATA_PATH, 41 | PAGE_ICON, 42 | REPO_URL, 43 | ) 44 | 45 | from env.prompt import ( 46 | NLSOM_PREFIX, 47 | NLSOM_FORMAT_INSTRUCTIONS, 48 | NLSOM_SUFFIX, 49 | ) 50 | 51 | 52 | 53 | logger = logging.getLogger(APP_NAME) 54 | 55 | 56 | def configure_logger(debug: int = 0) -> None: 57 | # boilerplate code to enable logging in the streamlit app console 58 | log_level = logging.DEBUG if debug == 1 else logging.INFO 59 | logger.setLevel(log_level) 60 | 61 | stream_handler = logging.StreamHandler(stream=sys.stdout) 62 | stream_handler.setLevel(log_level) 63 | 64 | formatter = logging.Formatter("%(message)s") 65 | 66 | stream_handler.setFormatter(formatter) 67 | 68 | logger.addHandler(stream_handler) 69 | logger.propagate = False 70 | 71 | 72 | configure_logger(0) 73 | 74 | 75 | def authenticate( 76 | openai_api_key = None, 77 | huggingface_api_key = None, 78 | bingsearch_api_key = None, 79 | wolframalpha_api_key = None, 80 | replicate_api_key = None, 81 | ) -> None: 82 | # Validate all credentials are set and correct 83 | # Check for env variables to enable local dev and deployments with shared credentials 84 | 85 | openai_api_key = ( 86 | openai_api_key 87 | or os.environ.get("OPENAI_API_KEY") 88 | or st.secrets.get("OPENAI_API_KEY") 89 | ) 90 | st.session_state["auth_ok"] = True 91 | if not (openai_api_key): 92 | st.session_state["auth_ok"] = False 93 | st.error("Credentials neither set nor stored", icon=PAGE_ICON) 94 | return 95 | try: 96 | # Try to access openai and deeplake 97 | with st.spinner("Authentifying..."): 98 | openai.api_key = openai_api_key 99 | openai.Model.list() 100 | 101 | except Exception as e: 102 | logger.error(f"Authentication failed with {e}") 103 | st.session_state["auth_ok"] = False 104 | st.error("Authentication failed", icon=PAGE_ICON) 105 | return 106 | 107 | # store credentials in the session state 108 | 109 | # OpenAI 110 | if openai_api_key == None and st.secrets["OPENAI_API_KEY"] == None: 111 | logger.info("Authentification - OpenAI - Fail!") 112 | else: 113 | st.session_state["openai_api_key"] = openai_api_key \ 114 | if openai_api_key \ 115 | else st.secrets["OPENAI_API_KEY"] 116 | 117 | os.environ["OPENAI_API_KEY"] = st.session_state["openai_api_key"] 118 | 119 | 120 | # Huggingface 121 | if huggingface_api_key == None and st.secrets["HUGGINGFACE_ACCESS_Tokens"] == None: 122 | logger.info("Authentification - Huggingface - Fail!") 123 | else: 124 | st.session_state["huggingface_api_key"] = huggingface_api_key \ 125 | if huggingface_api_key \ 126 | else st.secrets["HUGGINGFACE_ACCESS_Tokens"] 127 | os.environ["HUGGINGFACE_ACCESS_Tokens"] = st.session_state["huggingface_api_key"] 128 | 129 | # Wolframalpha 130 | if wolframalpha_api_key == None and st.secrets["WOLFRAM_ALPHA_APPID"] == None: 131 | logger.info("Authentification - WolframAlpha - Fail!") 132 | else: 133 | st.session_state["wolframalpha_api_key"] = wolframalpha_api_key \ 134 | if wolframalpha_api_key \ 135 | else st.secrets["WOLFRAM_ALPHA_APPID"] 136 | os.environ["WOLFRAM_ALPHA_APPID"] = st.session_state["wolframalpha_api_key"] 137 | 138 | 139 | # BingSearch 140 | if bingsearch_api_key == None and st.secrets["BING_SUBSCRIPTION_KEY"] == None: 141 | logger.info("Authentification - BingSearch - Fail!") 142 | else: 143 | st.session_state["bingsearch_api_key"] = bingsearch_api_key \ 144 | if bingsearch_api_key \ 145 | else st.secrets["BING_SUBSCRIPTION_KEY"] 146 | os.environ["BING_SUBSCRIPTION_KEY"] = st.session_state["bingsearch_api_key"] 147 | os.environ["BING_SEARCH_URL"] = "https://api.bing.microsoft.com/v7.0/search" 148 | 149 | 150 | # Replicate 151 | if replicate_api_key == None and st.secrets["REPLICATE_API_TOKEN"] == None: 152 | logger.info("Authentification - Replicate - Fail!") 153 | else: 154 | st.session_state["replicate_api_key"] = replicate_api_key \ 155 | if replicate_api_key \ 156 | else st.secrets["REPLICATE_API_TOKEN"] 157 | os.environ["REPLICATE_API_TOKEN"] = st.session_state["replicate_api_key"] 158 | 159 | #st.session_state["auth_ok"] = True 160 | 161 | def save_uploaded_file(uploaded_file: UploadedFile) -> str: 162 | # streamlit uploaded files need to be stored locally 163 | # before embedded and uploaded to the hub 164 | if not os.path.exists(DATA_PATH): 165 | os.makedirs(DATA_PATH) 166 | file_path = str(DATA_PATH / uploaded_file.name) 167 | uploaded_file.seek(0) 168 | file_bytes = uploaded_file.read() 169 | file = open(file_path, "wb") 170 | file.write(file_bytes) 171 | file.close() 172 | logger.info(f"Saved: {file_path}") 173 | return file_path 174 | 175 | 176 | def delete_uploaded_file(uploaded_file: UploadedFile) -> None: 177 | # cleanup locally stored files 178 | file_path = DATA_PATH / uploaded_file.name 179 | if os.path.exists(DATA_PATH): 180 | os.remove(file_path) 181 | logger.info(f"Removed: {file_path}") 182 | 183 | 184 | def handle_load_error(e: str = None) -> None: 185 | error_msg = f"Failed to load '{st.session_state['data_source']}':\n\n{e}" 186 | st.error(error_msg, icon=PAGE_ICON) 187 | logger.error(error_msg) 188 | st.stop() 189 | 190 | 191 | def load_git(data_source: str, chunk_size: int = CHUNK_SIZE) -> List[Document]: 192 | # We need to try both common main branches 193 | # Thank you github for the "master" to "main" switch 194 | # we need to make sure the data path exists 195 | if not os.path.exists(DATA_PATH): 196 | os.makedirs(DATA_PATH) 197 | repo_name = data_source.split("/")[-1].split(".")[0] 198 | repo_path = str(DATA_PATH / repo_name) 199 | clone_url = data_source 200 | if os.path.exists(repo_path): 201 | clone_url = None 202 | text_splitter = RecursiveCharacterTextSplitter( 203 | chunk_size=chunk_size, chunk_overlap=0 204 | ) 205 | branches = ["main", "master"] 206 | for branch in branches: 207 | try: 208 | docs = GitLoader(repo_path, clone_url, branch).load_and_split(text_splitter) 209 | break 210 | except Exception as e: 211 | logger.error(f"Error loading git: {e}") 212 | if os.path.exists(repo_path): 213 | # cleanup repo afterwards 214 | shutil.rmtree(repo_path) 215 | try: 216 | return docs 217 | except: 218 | msg = "Make sure to use HTTPS git repo links" 219 | handle_load_error(msg) 220 | 221 | 222 | def load_any_data_source( 223 | data_source: str, chunk_size: int = CHUNK_SIZE 224 | ) -> List[Document]: 225 | # Ugly thing that decides how to load data 226 | # It aint much, but it's honest work 227 | is_img = data_source.endswith(".png") 228 | is_video = data_source.endswith(".mp4") 229 | is_audio = data_source.endswith(".wav") 230 | is_text = data_source.endswith(".txt") 231 | is_web = data_source.startswith("http") 232 | is_pdf = data_source.endswith(".pdf") 233 | is_csv = data_source.endswith("csv") 234 | is_html = data_source.endswith(".html") 235 | is_git = data_source.endswith(".git") 236 | is_notebook = data_source.endswith(".ipynb") 237 | is_doc = data_source.endswith(".doc") 238 | is_py = data_source.endswith(".py") 239 | is_dir = os.path.isdir(data_source) 240 | is_file = os.path.isfile(data_source) 241 | 242 | loader = None 243 | if is_dir: 244 | loader = DirectoryLoader(data_source, recursive=True, silent_errors=True) 245 | elif is_git: 246 | return load_git(data_source, chunk_size) 247 | elif is_web: 248 | if is_pdf: 249 | loader = OnlinePDFLoader(data_source) 250 | else: 251 | loader = WebBaseLoader(data_source) 252 | elif is_file: 253 | if is_text: 254 | loader = TextLoader(data_source, encoding="utf-8") 255 | elif is_notebook: 256 | loader = NotebookLoader(data_source) 257 | elif is_pdf: 258 | loader = UnstructuredPDFLoader(data_source) 259 | elif is_html: 260 | loader = UnstructuredHTMLLoader(data_source) 261 | elif is_doc: 262 | loader = UnstructuredWordDocumentLoader(data_source) 263 | elif is_csv: 264 | loader = CSVLoader(data_source, encoding="utf-8") 265 | elif is_py: 266 | loader = PythonLoader(data_source) 267 | else: 268 | loader = UnstructuredFileLoader(data_source) 269 | try: 270 | # Chunk size is a major trade-off parameter to control result accuracy over computaion 271 | text_splitter = RecursiveCharacterTextSplitter( 272 | chunk_size=chunk_size, chunk_overlap=0 273 | ) 274 | docs = loader.load_and_split(text_splitter) 275 | logger.info(f"Loaded: {len(docs)} document chucks") 276 | return docs 277 | except Exception as e: 278 | msg = ( 279 | e 280 | if loader 281 | else f"No Loader found for your data source. Consider contributing:  {REPO_URL}!" 282 | ) 283 | handle_load_error(msg) 284 | 285 | 286 | def update_usage(cb: OpenAICallbackHandler) -> None: 287 | # Accumulate API call usage via callbacks 288 | logger.info(f"Usage: {cb}") 289 | callback_properties = [ 290 | "total_tokens", 291 | "prompt_tokens", 292 | "completion_tokens", 293 | "total_cost", 294 | ] 295 | for prop in callback_properties: 296 | value = getattr(cb, prop, 0) 297 | st.session_state["usage"].setdefault(prop, 0) 298 | st.session_state["usage"][prop] += value 299 | 300 | 301 | def cut_dialogue_history(history_memory, keep_last_n_words=500): 302 | if history_memory is None or len(history_memory) == 0: 303 | return history_memory 304 | 305 | tokens = str(history_memory).replace("[(", "").replace(")]", "").split() 306 | n_tokens = len(tokens) 307 | if n_tokens < keep_last_n_words: 308 | return history_memory 309 | paragraphs = history_memory.split('\n') 310 | last_n_tokens = n_tokens 311 | while last_n_tokens >= keep_last_n_words: 312 | last_n_tokens -= len(paragraphs[0].split(' ')) 313 | paragraphs = paragraphs[1:] 314 | return '\n' + '\n'.join(paragraphs) 315 | 316 | def generate_response(prompt: str, tools, history) -> str: 317 | 318 | # OpenAI Agent 319 | nlsom_organizer = OpenAI(temperature=0) 320 | nlsom_memory = ConversationBufferMemory(memory_key="chat_history", output_key="output") 321 | 322 | mindstorm = initialize_agent( 323 | tools, 324 | nlsom_organizer, 325 | agent="conversational-react-description", 326 | verbose=True, 327 | memory=nlsom_memory, 328 | return_intermediate_steps=True, 329 | agent_kwargs={'prefix': NLSOM_PREFIX, 'format_instructions': NLSOM_FORMAT_INSTRUCTIONS, 330 | 'suffix': NLSOM_SUFFIX}, ) 331 | 332 | mindstorm.memory.chat_memory.add_user_message(st.session_state["chat_history"][0][0]) 333 | mindstorm.memory.chat_memory.add_user_message(st.session_state["chat_history"][0][1]) 334 | 335 | response = mindstorm({'input': prompt.strip()}) 336 | response['output'] = response['output'].replace("\\", "/") 337 | response = re.sub('(data/[-\w]*.png)', lambda m: f'![](file={m.group(0)})*{m.group(0)}*', response['output']) 338 | 339 | logger.info(f"Response: '{response}'") 340 | st.session_state["chat_history"].append((prompt, response)) 341 | return response 342 | --------------------------------------------------------------------------------