├── .gitattributes ├── .gitignore ├── .idea ├── .gitignore ├── .name ├── Quest.iml ├── encodings.xml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── LICENSE ├── README.md ├── api_key.py ├── assistant.py ├── auth.py ├── conversation_settings ├── Coding_buddy.json ├── Creative.json ├── Pirate.json ├── Strictly_Factual.json ├── _create_setting.py ├── _create_setting_file.bat └── _create_setting_file.sh ├── database.py ├── gpt_api.py ├── internet_search.py ├── requirements.txt ├── streamlit_app.py ├── tutorial ├── Tutorial1.png ├── Tutorial2.png ├── Tutorial3.png └── Tutorial4.png └── utils.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ideas.txt 2 | api_key.txt 3 | venv 4 | search_history.pickle 5 | .streamlit 6 | __pycache__ 7 | deta 8 | test.py -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/.name: -------------------------------------------------------------------------------- 1 | test.py -------------------------------------------------------------------------------- /.idea/Quest.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 14 | 15 | 17 | -------------------------------------------------------------------------------- /.idea/encodings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 12 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Quest 2 | This is a web app that integrates GPT-3 with google searches. 3 | 4 | This analyses the first few results from a google search and stores snippets of text so the Assistant can remember facts. Then you can ask the Assistant about those facts and it will be able to retrive the relevant information gathered previously. 5 | 6 | Anyone willing to help build this tool is welcome to send merge requests. 7 | 8 | ## How can I try this out? 9 | 10 | First, you will need an API key from OpenAI. [Click here to get your API key](https://beta.openai.com/account/api-keys). Make sure to store this key in a safe place. 11 | 12 | Currently the web app is deployed [here](https://aichat.streamlit.app/). 13 | 14 | ## Usage 15 | 16 | There are three main sections in this app. The 'Assistant settings', the 'Ask the Assistant', and the 'Google search'. 17 | 18 | ![Usage1](tutorial/Tutorial1.png) 19 | 20 | --- 21 | ### Google search 22 | 23 | To use the Google search box, enter a query in the text input and hit 'Submit'. This will trigger a search on Google using your query. The results will be listed and stored localy. These results can later be used by the Assistant to answer questions. 24 | 25 | ![GoogleSearch](tutorial/Tutorial2.png) 26 | 27 | --- 28 | ### Ask me anything 29 | 30 | Using the most relevant search results and most relevant chat history, the Assistant will answer your query. It's behaviour will be different depending on the settings you chose. 31 | 32 | ![AskMeAnything](tutorial/Tutorial3.png) 33 | 34 | --- 35 | ### Assistant settings 36 | 37 | The Assistant settings serves to determine how the Assistant will behave. If you set it to `Strictly Factual`, it will try to not say any facts beyond the Google searches. `Creative` will still use the searches, but allow itself to generate creative responses while being less concerned with factuality. 38 | 39 | ## Create your own Assistant 40 | 41 | There are a few default settings that can be chosen to customize how the Assistant is behaves. However, you can create your own settings to open for many more possibilities. To do this, navigate to the `conversation_settings` folder and open the `_create_setting.py` file. 42 | 43 | ![Tutorial4](tutorial/Tutorial4.png) 44 | 45 | Inside, you can create the custom settings and give it a name. Then double-click `_create_setting_file` (.bat for Windows and .sh for macOS/Linux). 46 | -------------------------------------------------------------------------------- /api_key.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import database as db 3 | from utils import tell_to_reload_page 4 | from gpt_api import test_api_key 5 | 6 | # Load the API key to the session state 7 | def load_api_key(): 8 | if 'api_key' not in st.session_state: 9 | try: 10 | #api_key = db.get_api_key(st.session_state['username']) 11 | #st.session_state['api_key'] = api_key 12 | raise 13 | except: 14 | api_key_form() 15 | return st.session_state['api_key'] 16 | 17 | 18 | def api_key_form(): 19 | with st.form('API Key'): 20 | api_key = st.text_input(label='Insert your API key here', type='password') 21 | api_submitted = st.form_submit_button("Submit") 22 | save_api_key = st.checkbox('Remember my key.',disabled=True) 23 | 24 | st.markdown("[Find your API key here](https://beta.openai.com/account/api-keys)") 25 | 26 | if api_submitted: 27 | test_api_key(api_key) 28 | 29 | # Associate the user API key with the user account 30 | if api_submitted and save_api_key: 31 | db.insert_api_key(st.session_state['username'], api_key) 32 | 33 | if api_submitted: 34 | st.session_state['api_key'] = api_key 35 | st.experimental_rerun() 36 | 37 | st.stop() 38 | 39 | def reset_api_key(): 40 | #db.delete_api_key(st.session_state['username']) 41 | if 'api_key' in st.session_state: 42 | st.session_state.pop('api_key') 43 | st.experimental_rerun() 44 | 45 | def reset_key_button(): 46 | st.button('Reset API Key', on_click=reset_api_key) 47 | -------------------------------------------------------------------------------- /assistant.py: -------------------------------------------------------------------------------- 1 | import re 2 | import os 3 | import json 4 | import streamlit as st 5 | from streamlit_extras.add_vertical_space import add_vertical_space 6 | import pandas as pd 7 | from datetime import datetime 8 | from gpt_api import find_top_similar_results, gpt3_call 9 | from gpt_api import create_embedding 10 | from utils import markdown_litteral, num_of_tokens 11 | from internet_search import * 12 | 13 | 14 | def load_assistant_settings(): 15 | own_script_name = os.path.basename(__file__) 16 | script_path = os.path.abspath(__file__).replace(own_script_name, '') 17 | folder_path = os.path.join(script_path, 'conversation_settings') 18 | file_names = os.listdir(folder_path) 19 | all_settings = [] 20 | for file_name in file_names: 21 | if file_name.endswith('.json'): 22 | with open(os.path.join(folder_path, file_name)) as f: 23 | data = json.load(f) 24 | all_settings.append(data) 25 | 26 | archetypes = {setting['setting_name']: {'mood':setting['mood'], 27 | 'warn_assistant':setting['warn_assistant'], 28 | 'starting_conversation':pd.DataFrame(setting['starting_conversation'])} 29 | for setting in all_settings} 30 | 31 | default_setting = 'Strictly Factual' 32 | default_setting_index = list(archetypes.keys()).index(default_setting) 33 | 34 | return archetypes, default_setting_index 35 | 36 | 37 | def load_conversation(starting_conversation): 38 | # load conversation if it is not loaded or if the initial conversation has changed. 39 | #if ('conversation' not in st.session_state 40 | #or starting_conversation['text'].to_list() != 41 | #st.session_state['conversation']['text'].iloc[:len(starting_conversation.index)].to_list()): 42 | if 'conversation' not in st.session_state: 43 | st.session_state['conversation'] = starting_conversation 44 | 45 | 46 | def display_chat_history(starting_conversation): 47 | chat_so_far = '' 48 | for i, text in enumerate(st.session_state['conversation']['text']): 49 | chat_so_far += text + '\n' 50 | if i < len(starting_conversation): continue 51 | if text[:4] == 'User': 52 | text = '👤' + text[:-13] 53 | else: 54 | text = '🖥️' + markdown_litteral(text[:-13]) 55 | st.write(text) 56 | st.markdown('---') 57 | 58 | 59 | def add_conversation_entry(new_entry): 60 | text_length = len(new_entry) 61 | data = pd.DataFrame({'text': new_entry, 'text_length': text_length}, index=[0]) 62 | data['ada_search'] = data['text'].apply(lambda x: create_embedding(x)) 63 | st.session_state['conversation'] = pd.concat([st.session_state['conversation'], data], 64 | ignore_index=True) 65 | 66 | 67 | def create_prompt(settings, 68 | user_chat_text, 69 | similar_google_results, 70 | similar_conversation, 71 | current_time, 72 | current_date_and_time): 73 | 74 | mood = settings['archetype']['mood'] 75 | warn_assistant = settings['archetype']['warn_assistant'] 76 | 77 | prompt = mood + current_date_and_time 78 | if similar_google_results.empty: 79 | prompt += "The user did not make a google search to provide more information.\n" 80 | else: 81 | prompt += "The user provided you with google searches and your findings from different \ 82 | sources are: \n" + '\n'.join(similar_google_results['text'].to_list()) + "\n" 83 | prompt += 'These are the relevant entries from the conversation so far (in order of importance):\n' + \ 84 | '\n'.join(similar_conversation['text'].to_list()) + '\nThese are the last two messages:\n\ 85 | ' + st.session_state['conversation']['text'].iloc[-1] + warn_assistant #'\nUser: \ 86 | #' + user_chat_text + f' ({current_time})\n' + warn_assistant + '\nAssistant:' 87 | 88 | prompt_model = [ 89 | {'role': 'system', 'content': prompt}, 90 | {'role': 'user', 'content': user_chat_text} 91 | ] 92 | 93 | return prompt, prompt_model 94 | 95 | def display_assistant_response(similar_google_results, prompt, answer): 96 | st.markdown('---') 97 | st.write('🖥️Assistant: ' + markdown_litteral(answer)) 98 | with st.expander("What sources did I use to make this answer?"): 99 | for row in similar_google_results.iterrows(): 100 | st.write(markdown_litteral(row[1]['text']) + f" [Source]({row[1]['link']})") 101 | with st.expander("Prompt used:"): 102 | st.write(markdown_litteral(prompt).replace('\n',' \n \n')) 103 | st.markdown(':green[Tokens used: ]' + f':green[{str(num_of_tokens(prompt))}]') 104 | 105 | 106 | def assistant_settings(chat_submitted, col2): 107 | settings = {} 108 | if 'answer_with_search' not in st.session_state['settings']: 109 | st.session_state['settings']['answer_with_search'] = True 110 | settings['answer_with_search'] = col2.checkbox('Search internet to answer', 111 | value=True, 112 | help="When checked, the Assistant will make a new \ 113 | search using your question as the query. If you \ 114 | disable this, the Assistant will use only the \ 115 | search history.") 116 | with st.expander("Assistant settings"): 117 | col1, col2 = st.columns(2) 118 | archetypes, default_setting_index = load_assistant_settings() 119 | archetype = col1.selectbox('Archetype', 120 | archetypes.keys(), 121 | help='Determines how the assistant will behave \ 122 | (Custom archetypes can be created in the \ 123 | "Create your Assistant" tab).', 124 | index=default_setting_index) 125 | 126 | if 'num_of_excerpts' not in st.session_state['settings']: 127 | st.session_state['settings']['num_of_excerpts'] = 5 128 | st.session_state['settings']['consult_search_history'] = True 129 | st.session_state['settings']['specify_sources'] = '' 130 | st.session_state['settings']['temperature'] = 1.0 131 | 132 | settings['temperature'] = col2.slider('Temperature', 133 | min_value=0.0,max_value=1.0,value=1.0,step=0.01, 134 | help="Determine how random the Assistant responses are \ 135 | lower numbers mean more deterministic answers \ 136 | higher values mean more random.") 137 | 138 | settings['specify_sources'] = st.text_input("Specify links", 139 | help="This field allows you to specify urls \ 140 | for the Assistant to source from. \ 141 | Separate each link with a comma \ 142 | and space `, `.", 143 | value='') 144 | with col2.container(): 145 | add_vertical_space(1) 146 | 147 | settings['consult_search_history'] = col2.checkbox('Consult search history', 148 | value=True, 149 | help="When checked, the Assistant will look into \ 150 | the search history to find relevant excerpts.") 151 | 152 | settings['num_of_excerpts'] = col1.number_input('How many excerpts to use', 153 | min_value=1, 154 | value=5, 155 | help='This indicates how many \ 156 | pieces of texts from searches \ 157 | to use in the prompt') 158 | 159 | if chat_submitted: 160 | settings['archetype'] = archetypes[archetype] 161 | st.session_state['settings'] = settings 162 | 163 | 164 | return settings 165 | 166 | 167 | def submit_user_message(settings, user_chat_text, chat_submitted): 168 | if not chat_submitted or user_chat_text == '': return 169 | 170 | # Show user message 171 | st.write('👤User: ' + user_chat_text) 172 | 173 | # Find relevant search results and conversation entries to craft the AI prompt 174 | similar_google_results = get_info_from_internet(user_chat_text, settings) 175 | with st.spinner('Sending message...'): 176 | similar_conversation = find_top_similar_results(st.session_state['conversation'], 177 | user_chat_text, 4) 178 | 179 | # Knowing the current time and date may be important for interpreting news articles. 180 | date = datetime.now() 181 | current_time = f'{date.strftime("%I:%M:%S %p")}' 182 | current_date_and_time = f'Current time is {date.strftime("%I:%M %p %A %B %d %Y")}.\n' 183 | 184 | prompt_text, prompt_model = create_prompt( 185 | settings, 186 | user_chat_text, 187 | similar_google_results, 188 | similar_conversation, 189 | current_time, 190 | current_date_and_time 191 | ) 192 | 193 | tokens = num_of_tokens(prompt_text) 194 | 195 | # Send prompt to the AI and record it to chat history 196 | with st.spinner('Generating response...'): 197 | answer = gpt3_call(prompt_model, 198 | tokens=3000 - tokens, 199 | temperature=settings['temperature'], 200 | stop='User:') 201 | answer = remove_timestamp(answer) 202 | add_conversation_entry('User: ' + user_chat_text + f' ({current_time})') 203 | current_time = f'{date.strftime("%I:%M:%S %p")}' 204 | add_conversation_entry('Assistant: ' + answer + f' ({current_time})') 205 | 206 | display_assistant_response(similar_google_results, prompt_text, answer) 207 | 208 | 209 | def add_searches(settings): 210 | with st.expander("Add searches"): 211 | num_of_queries = st.number_input("Number of additional searches", min_value=0, value=1) 212 | 213 | settings['additional_searches'] = [] 214 | for i in range(num_of_queries): 215 | search = st.text_input("Search query", key=i) 216 | if search != '': 217 | settings['additional_searches'].append(search) 218 | 219 | return settings 220 | 221 | 222 | def get_info_from_internet(user_chat_text, settings): 223 | answer_with_search = settings['answer_with_search'] 224 | additional_searches = settings['additional_searches'] 225 | specify_sources = settings['specify_sources'].split(', ') 226 | consult_search_history = settings['consult_search_history'] 227 | num_of_excerpts = settings['num_of_excerpts'] 228 | 229 | history = st.session_state['google_history'] 230 | 231 | sources_content = pd.DataFrame() 232 | if specify_sources != ['']: 233 | sources_content = search_new_links(user_chat_text, specify_sources, history, sources_content) 234 | 235 | if additional_searches != []: 236 | sources_content = search_new_queries(additional_searches, history, sources_content) 237 | 238 | if answer_with_search: 239 | sources_content = search_new_queries([user_chat_text], history, sources_content) 240 | 241 | if not consult_search_history: 242 | if sources_content.empty: return pd.DataFrame() 243 | return find_top_similar_results(sources_content, user_chat_text, num_of_excerpts) 244 | 245 | all_results = st.session_state['google_history'] 246 | all_results = pd.concat([all_results, sources_content]) 247 | 248 | return find_top_similar_results(all_results, user_chat_text, num_of_excerpts) 249 | 250 | def search_new_links(user_chat_text, specify_sources, history, sources_content): 251 | already_seen_results = history[history['link'].isin(specify_sources)] 252 | links_not_in_history = [value for value in specify_sources if value not in history['link'].values] 253 | #print() 254 | if all_are_valid_links(links_not_in_history): 255 | sources_content = page_search(user_chat_text,len(links_not_in_history),links_not_in_history) 256 | update_history(sources_content) 257 | sources_content = pd.concat([sources_content, already_seen_results]) 258 | return sources_content 259 | 260 | def search_new_queries(additional_searches, history, sources_content): 261 | already_seen_results = history[history['query'].isin(additional_searches)] 262 | query_not_in_history = [value for value in additional_searches if value not in history['query'].values] 263 | sources_content = pd.concat([sources_content, already_seen_results]) 264 | queries_results = pd.DataFrame() 265 | for search in query_not_in_history: 266 | query_results = ddg_search(search, 3) 267 | queries_results = pd.concat([queries_results,query_results]) 268 | update_history(queries_results) 269 | sources_content = pd.concat([sources_content, queries_results]) 270 | return sources_content 271 | 272 | def remove_timestamp(string): 273 | # Compile a regex pattern to match the timestamp at the end of the string 274 | pattern = re.compile(r'$\d\d:\d\d:\d\d [AP]M$$') 275 | return pattern.sub('', string) # Use the regex to replace the timestamp with an empty string -------------------------------------------------------------------------------- /auth.py: -------------------------------------------------------------------------------- 1 | # Credit https://github.com/Sven-Bo/streamlit-sales-dashboard-with-userauthentication-database 2 | 3 | import streamlit as st 4 | import database as db 5 | from datetime import datetime, timedelta 6 | import streamlit_authenticator as stauth 7 | from utils import tell_to_reload_page 8 | 9 | 10 | class LoginSignup(stauth.Authenticate): 11 | def login(self, form_name): 12 | 13 | if not st.session_state['authentication_status']: 14 | self.token = self.cookie_manager.get(self.cookie_name) 15 | if self.token is not None: 16 | self.token = self.token_decode() 17 | if self.token is not False: 18 | if not st.session_state['logout']: 19 | if self.token['exp_date'] > datetime.utcnow().timestamp(): 20 | if 'name' and 'username' in self.token: 21 | st.session_state['name'] = self.token['name'] 22 | st.session_state['username'] = self.token['username'] 23 | st.session_state['authentication_status'] = True 24 | 25 | if st.session_state['authentication_status'] != True: 26 | login, signup = st.tabs(['Login', 'Signup']) 27 | 28 | with login: 29 | login_form = st.form('Login') 30 | login_form.subheader(form_name) 31 | self.username = login_form.text_input('Username') 32 | st.session_state['username'] = self.username 33 | self.password = login_form.text_input('Password', type='password') 34 | remember = st.checkbox("Remember me") 35 | 36 | if login_form.form_submit_button('Login'): 37 | if remember: self.cookie_expiry_days = 30 38 | self.index = None 39 | for i in range(0, len(self.usernames)): 40 | if self.usernames[i] == self.username: 41 | self.index = i 42 | if self.index is not None: 43 | try: 44 | if self.check_pw(): 45 | st.session_state['name'] = self.names[self.index] 46 | self.exp_date = self.exp_date() 47 | self.token = self.token_encode() 48 | self.cookie_manager.set(self.cookie_name, self.token, 49 | expires_at=datetime.now() + timedelta(days=self.cookie_expiry_days)) 50 | st.session_state['authentication_status'] = True 51 | else: 52 | st.session_state['authentication_status'] = False 53 | except Exception as e: 54 | print(e) 55 | else: 56 | st.session_state['authentication_status'] = False 57 | 58 | with signup: 59 | with st.form('Signup'): 60 | st.subheader('Signup') 61 | name = st.text_input('Name') 62 | username = st.text_input('Username') 63 | password = st.text_input('Password', type='password') 64 | confirm_password = st.text_input('Confirm password', type='password') 65 | signup_button = st.form_submit_button('Signup') 66 | 67 | if signup_button: 68 | if username in self.usernames: 69 | st.warning('This username already exists. Please choose an unique username.') 70 | st.stop() 71 | 72 | if name == '' or username == '' or password == '': 73 | st.warning('Please fill all fields') 74 | st.stop() 75 | 76 | if username == '__removed__': 77 | st.warning('Invalid username') 78 | st.stop() 79 | 80 | if password != confirm_password: 81 | st.warning("Password does not match 'Confirm password' field.") 82 | st.stop() 83 | 84 | hashed_password = stauth.Hasher([password]).hash(password) 85 | 86 | db.insert_user(username, name, hashed_password) 87 | 88 | st.success('Successful account creation. You may now use the Login tab.', icon='✅') 89 | 90 | return st.session_state['name'], st.session_state['authentication_status'], st.session_state['username'] 91 | 92 | def logout_function(self): 93 | self.cookie_manager.delete(self.cookie_name) 94 | st.session_state['logout'] = True 95 | st.session_state['name'] = None 96 | st.session_state['username'] = None 97 | st.session_state['authentication_status'] = None 98 | 99 | if 'api_key' in st.session_state: 100 | st.session_state.pop('api_key') 101 | if 'conversation' in st.session_state: 102 | st.session_state.pop('conversation') 103 | if 'google_history' in st.session_state: 104 | st.session_state.pop('google_history') 105 | if 'settings' in st.session_state: 106 | st.session_state.pop('settings') 107 | 108 | 109 | def logout_button(self, button_name): 110 | if st.button(button_name): 111 | self.logout_function() 112 | st.stop() 113 | 114 | def authenticate_user(): 115 | users = db.fetch_all_users() 116 | 117 | usernames = [user["key"] for user in users] 118 | names = [user["name"] for user in users] 119 | hashed_passwords = [user["password"] for user in users] 120 | 121 | authenticator = LoginSignup(names, usernames, hashed_passwords, 122 | "query_aichat", "abcdef", cookie_expiry_days=0) 123 | 124 | st.session_state['authenticator'] = authenticator 125 | 126 | (st.session_state['name'], 127 | authentication_status, 128 | st.session_state['username']) = authenticator.login("Login") 129 | 130 | if authentication_status == None: 131 | st.stop() 132 | if authentication_status == False: 133 | st.warning("Username/password is incorrect") 134 | st.stop() 135 | 136 | def logout_button(): 137 | st.session_state['authenticator'].logout_button("Logout") -------------------------------------------------------------------------------- /conversation_settings/_create_setting.py: -------------------------------------------------------------------------------- 1 | # To create a new setting, simply change setting_name, mood, warn_assistant, and 2 | # starting_conversation. Then run this script. 3 | 4 | setting_name = 'Coding buddy' 5 | mood = "You are a friendly and helpful AI assistant. You don't have access to the internet beyond the google searches that the user provides but you are very knowledgeable about how to code." 6 | warn_assistant = "ATTENTION: If the user asks a question about coding, try to answer the question giving coding examples and explaining the code. DO NOT provide any hyperlinks and answer using the markdown formatting." 7 | starting_conversation = ['User: Who are you?', 8 | 'Assistant: Hello, my name is Assistant. How can I help you?', 9 | "User: Can you show me how to loop between 0 and 9 in python?", 10 | "Assistant: Sure. Here's how you can loop between 0 and 9 in python:\n```python\nfor i in range(10):\n print(i)\n```", 11 | "User: How to concatenate two dataframes?", 12 | "Assistant: To concatenate two dataframes with the same columns you need to use the pandas.concat() method. Here's an example of how to use it:\n```python\nimport pandas as pd\n\n# create two dataframes with same columns\ndf1 = pd.DataFrame({'Id': [1, 2, 3], 'Name': ['John', 'Sam', 'Alice']})\ndf2 = pd.DataFrame({'Id': [4, 5, 6], 'Name': ['Dat', 'Kim', 'Jill']})\n\n# concatenate if value of column 'Id' is same\ndf3 = pd.concat([df1, df2], axis=0, ignore_index=True, sort=False)\n\n# view result\ndf3\n```\nThe output should look like this:\n| Id | Name |\n| --- | --- |\n| 1 | John |\n| 2 | Sam|\n| 3 | Alice |\n| 4 | Dat |\n| 5 | Kim|\n| 6 | Jill |\n*Note: This is just an example and there might be more sophisticated techniques to concatenate dataframes, depending on your problem."] 13 | 14 | import pandas as pd 15 | from openai.embeddings_utils import get_embedding, cosine_similarity 16 | import openai 17 | import json 18 | import os 19 | 20 | # Navigate to the parent directory 21 | parent_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 22 | with open(os.path.join(parent_dir, 'api_key.txt'), 'r') as f: 23 | openai.api_key = f.read() 24 | 25 | text_length = [len(x) for x in starting_conversation] 26 | data = pd.DataFrame({'text': starting_conversation, 'text_length': text_length}) 27 | print('Creating embeddings...') 28 | data['ada_search'] = data['text'].apply(lambda x: get_embedding(x, engine='text-embedding-ada-002')) 29 | 30 | dictionary = { 31 | "setting_name": setting_name, 32 | "mood": mood + '\n', 33 | "warn_assistant": '\n' + warn_assistant + '\n', 34 | "starting_conversation": data.to_dict() 35 | } 36 | 37 | # Serializing json 38 | json_object = json.dumps(dictionary, indent=4) 39 | 40 | # Writing to file 41 | file_name = setting_name.replace(' ', '_') + ".json" 42 | with open(file_name, "w") as outfile: 43 | outfile.write(json_object) 44 | 45 | print('Setting successfully created. You may close this window') -------------------------------------------------------------------------------- /conversation_settings/_create_setting_file.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | 3 | 4 | cd .. 5 | start cmd /k "cd venv\Scripts\ && activate.bat && cd ..\.. && cd conversation_settings && python _create_setting.py" -------------------------------------------------------------------------------- /conversation_settings/_create_setting_file.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd .. 4 | source venv/bin/activate 5 | python3 conversation_settings/_create_setting.py -------------------------------------------------------------------------------- /database.py: -------------------------------------------------------------------------------- 1 | # Credit https://github.com/Sven-Bo/streamlit-sales-dashboard-with-userauthentication-database 2 | 3 | from cryptography.fernet import Fernet 4 | import streamlit as st 5 | import pandas as pd 6 | import datetime 7 | from deta import Deta # pip install deta 8 | from utils import tell_to_reload_page 9 | 10 | 11 | #DETA_KEY = st.secrets["DETA_KEY"] 12 | #KEY_MAP = bytes(st.secrets['KEY_MAP'], "utf-8") 13 | 14 | #deta = Deta(DETA_KEY) 15 | #cipher = Fernet(KEY_MAP) 16 | 17 | 18 | #def encrypt(data): 19 | # if type(data) != bytes: data = string_to_bytes(data) 20 | # return bytes_to_string(cipher.encrypt(data)) 21 | 22 | 23 | #def decrypt(data): 24 | # if type(data) != bytes: data = string_to_bytes(data) 25 | # return bytes_to_string(cipher.decrypt(data)) 26 | 27 | 28 | def string_to_bytes(string: str): 29 | return string.encode('utf-8') 30 | 31 | 32 | def bytes_to_string(bytes: bytes): 33 | return bytes.decode("utf-8") 34 | 35 | 36 | #db_login = deta.Base("quest_users") 37 | #db_api_key = deta.Base("quest_api_key") 38 | #db_search_history = deta.Base("quest_internet_search") 39 | #db_user_settings = deta.Base("quest_user_settings") 40 | 41 | 42 | def insert_user(username, name, password): 43 | """Returns the user on a successful user creation, otherwise raises and error""" 44 | #return db_login.put({"key": username, "name": name, "password": password}) 45 | 46 | 47 | def fetch_all_users(): 48 | """Returns a dict of all users""" 49 | #res = db_login.fetch() 50 | #return res.items 51 | 52 | def get_user(username): 53 | """If not found, the function will return None""" 54 | #return db_login.get(username) 55 | 56 | 57 | def update_user(username, updates): 58 | """If the item is updated, returns None. Otherwise, an exception is raised""" 59 | #return db_login.update(updates, username) 60 | 61 | 62 | def delete_user_login(username): 63 | """Always returns None, even if the key does not exist""" 64 | st.session_state['authenticator'].logout_function() 65 | #db_login.delete(username) 66 | #tell_to_reload_page() 67 | 68 | 69 | def insert_api_key(username, api_key): 70 | now = datetime.datetime.now() 71 | thirty_days_from_now = now + datetime.timedelta(days=1) 72 | #try: # If key exists in the database 73 | #get_api_key(username) 74 | #return db_api_key.update({'api_key': encrypt(api_key)}, username, expire_at=thirty_days_from_now) 75 | #except TypeError: # If key doesn't exist in the database 76 | #return db_api_key.put({'key': username, 'api_key': encrypt(api_key)}, expire_at=thirty_days_from_now) 77 | 78 | 79 | def get_api_key(username): 80 | #encrypted_key = db_api_key.get(username)['api_key'] 81 | #return decrypt(encrypted_key) 82 | raise 83 | 84 | 85 | def delete_api_key(username): 86 | #return db_api_key.delete(username) 87 | pass 88 | 89 | 90 | def insert_search_history(search_entry): 91 | #return db_search_history.put_many(search_entry) 92 | pass 93 | 94 | 95 | def get_user_search_history(username): 96 | #if username == '__removed__': return [] 97 | #entries = db_search_history.fetch({'username': username}).items 98 | #return entries 99 | raise 100 | 101 | 102 | def delete_search_history(username): 103 | #user_history = db_search_history.fetch({'username': username}).items 104 | #user_history = pd.DataFrame(user_history) 105 | #if user_history.empty: return 106 | #with st.spinner("Deleting search history."): 107 | #for key in user_history['key']: 108 | #db_search_history.update({'username': '__removed__'}, key=key) 109 | #st.session_state.pop('google_history', None) 110 | pass 111 | 112 | 113 | def delete_user_data(username): 114 | delete_search_history(username) 115 | delete_api_key(username) 116 | delete_user_login(username) 117 | 118 | def delete_user_button(): 119 | with st.form("Delete all user data."): 120 | st.write('Delete all user data') 121 | st.warning("This will permanently delete all of your data \ 122 | with no chance for recovery.") 123 | confirmation = st.text_input('Type "delete me"') 124 | submit_button = st.form_submit_button("Submit") 125 | if submit_button: 126 | if confirmation == 'delete me': 127 | delete_user_data(st.session_state['username']) -------------------------------------------------------------------------------- /gpt_api.py: -------------------------------------------------------------------------------- 1 | from openai.embeddings_utils import get_embedding, cosine_similarity 2 | import pandas as pd 3 | from utils import api_error_warning 4 | import openai 5 | import streamlit as st 6 | 7 | 8 | def find_top_similar_results(df: pd.DataFrame, query: str, n: int): 9 | if len(df.index) < n: 10 | n = len(df.index) 11 | embedding = create_embedding(query) 12 | df1 = df.copy() 13 | df1["similarities"] = df1["ada_search"].apply(lambda x: cosine_similarity(x, embedding)) 14 | best_results = df1.sort_values("similarities", ascending=False).head(n) 15 | return best_results.drop(['similarities', 'ada_search'], axis=1).drop_duplicates(subset=['text']) 16 | 17 | 18 | def create_embedding(query): 19 | query = query.encode(encoding='ASCII', errors='ignore').decode() 20 | return get_embedding(query, engine="text-embedding-ada-002") 21 | try: 22 | return get_embedding(query, engine="text-embedding-ada-002") 23 | except: 24 | api_error_warning() 25 | st.stop() 26 | 27 | def test_api_key(api_key): 28 | openai.api_key = api_key 29 | with st.spinner("Validading API key..."): 30 | try: 31 | get_embedding('a', engine="text-embedding-ada-002") 32 | except: 33 | api_error_warning() 34 | if 'api_key' in st.session_state: 35 | st.session_state.pop('api_key') 36 | st.stop() 37 | 38 | 39 | def gpt3_call(prompt, tokens: int, temperature: int=1, stop=None): 40 | try: 41 | response = openai.ChatCompletion.create( 42 | model="gpt-3.5-turbo", 43 | messages=prompt, 44 | max_tokens=tokens, 45 | stop=stop, 46 | temperature=temperature) 47 | 48 | return response["choices"][0]['message']["content"].replace('\n', ' \n') 49 | except Exception as e: 50 | print(e) 51 | api_error_warning() -------------------------------------------------------------------------------- /internet_search.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import bs4 3 | import io 4 | import re 5 | import PyPDF2 6 | import requests 7 | from logging import warning 8 | import threading 9 | import pandas as pd 10 | import database as db 11 | from duckduckgo_search import ddg 12 | from gpt_api import create_embedding, find_top_similar_results 13 | from utils import markdown_litteral, separate_list 14 | 15 | 16 | def ddg_search(query: str, numResults: int, region: str=None, time_period=None): 17 | try: 18 | results = ddg(query, region, 'on', time_period, numResults) 19 | except Exception as e: 20 | print(e) 21 | return failed_ddg_search(query) 22 | 23 | if results == None: 24 | return failed_ddg_search(query) 25 | 26 | st.write(results) 27 | results = pd.DataFrame(results) 28 | results.columns = ['title', 'link', 'text'] 29 | results['query'] = [query for _ in results.index] 30 | results['text_length'] = results['text'].str.len() 31 | results['ada_search'] = results['text'].apply(lambda x: create_embedding(x)) 32 | return results 33 | 34 | 35 | def failed_ddg_search(query: str): 36 | st.warning(f'Could not find any internet results for the following query: \n \n\ 37 | {query} \n \nTo avoid seing this error, disable the "Search internet to answer" option.\ 38 | whenever not asking something that you would ask to a search engine.', icon='😵') 39 | results = pd.DataFrame(columns=['title', 'link', 'text', 'query', 'text_length', 'ada_search']) 40 | return results 41 | 42 | 43 | def google_search(search: str, search_depth: int): 44 | # Make a search request 45 | try: 46 | res = requests.get('https://google.com/search?q=' + search) 47 | res.raise_for_status() # Raise if a HTTPError occured 48 | except: 49 | warning("There was a problem with this services's internet") 50 | st.warning("There was a problem with this services's internet.😵 \n\ 51 | If you got HTTPError: 429, that means this services's IP \ 52 | is being rate limited. If you experience this, \ 53 | please report the issue at https://github.com/farrael004/Quest/issues. \ 54 | \n \nYou can try again by refreshing the page.") 55 | raise 56 | 57 | links = find_links_from_search(res) 58 | largest_results = page_search(search, search_depth, links) 59 | return largest_results 60 | 61 | 62 | def find_links_from_search(res): 63 | # Extract link results from the search request 64 | with st.spinner("Getting search results..."): 65 | soup = bs4.BeautifulSoup(res.text, 'html.parser') 66 | link_elements = soup.select('a') 67 | links = [link.get('href').split('&sa=U&ved=')[0].replace('/url?q=', '') 68 | for link in link_elements 69 | if '/url?q=' in link.get('href') and 70 | 'accounts.google.com' not in link.get('href') and 71 | 'support.google.com' not in link.get('href')] 72 | return list(set(links)) # Remove duplicates while maintaining the same order 73 | 74 | def page_search(search, search_depth, links): 75 | with st.spinner(text="Searching the internet..."): 76 | # Explore the links 77 | links_attempted = -1 78 | links_explored = 0 79 | search_results = pd.DataFrame(columns=['text', 'link', 'query']) 80 | link_history = st.session_state['google_history']['link'].unique().tolist() 81 | while links_explored < search_depth or links_attempted == len(links): 82 | links_attempted += 1 83 | if links == []: 84 | st.warning(f"No internet results found for \"{search}\".😢 \nTry again with a different query.") 85 | st.stop() 86 | if links[links_attempted] in link_history: continue 87 | # If this link does not work, go to the next one 88 | try: 89 | res = requests.get(links[links_attempted]) 90 | res.raise_for_status() 91 | except: 92 | continue 93 | 94 | # Create a table with the useful texts from the page, the page's link, and the query used 95 | useful_text = extract_useful_text(res) 96 | link_list = [links[links_attempted] for i in range(len(useful_text))] # Creates a list of the same link to match the length of useful_text 97 | query_list = [search for i in range(len(useful_text))] # Creates a list of the same query to match the length of useful_text 98 | link_results = pd.DataFrame({'text': useful_text, 'link': link_list, 'query': query_list}) 99 | search_results = pd.concat([search_results, link_results]) 100 | links_explored += 1 101 | 102 | # Filter for only the largest results 103 | search_results['text_length'] = search_results['text'].str.len() 104 | largest_results = search_results.nlargest(50, 'text_length') 105 | largest_results = largest_results.drop_duplicates() 106 | 107 | # Create embeddings 108 | with st.spinner('Analysing results...'): 109 | largest_results['ada_search'] = largest_results['text'].apply(lambda x: create_embedding(x)) 110 | return largest_results 111 | 112 | 113 | def extract_useful_text(res): 114 | if res.headers['Content-Type'] == 'application/pdf': 115 | return extract_from_pdf(res) 116 | return extract_from_html(res) 117 | 118 | 119 | def extract_from_html(res): 120 | soup = bs4.BeautifulSoup(res.text, 'html.parser') 121 | link_text = list(set(soup.get_text().splitlines())) # Separate all text by lines and remove duplicates 122 | useful_text = [s for s in link_text if len(s) > 30] # Get only strings above 30 characters 123 | useful_text = split_paragraphs(useful_text) # If the string is too long it will split it at full stops '. ' 124 | return split_paragraphs(useful_text) # Do it again just for good measure (otherwise it wouldn't work for all strings for some reason. Try searching "Who is Elon Musk" to test this issue) 125 | 126 | 127 | def extract_from_pdf(response): 128 | pdf_content = response.content 129 | pdf_reader = PyPDF2.PdfFileReader(io.BytesIO(pdf_content)) # Open the PDF file using PyPDF2 130 | # Extract the text from the PDF file 131 | pdf_text = '' 132 | for page_num in range(pdf_reader.getNumPages()): 133 | pdf_text += pdf_reader.getPage(page_num).extractText() 134 | 135 | pdf_text = re.sub(r'\r\n|\r|\n', ' ', pdf_text) # Remove new lines 136 | return split_paragraphs([pdf_text], 500) 137 | 138 | def make_new_internet_search(user_query_text): 139 | google_history = get_user_search_history() 140 | query_history = google_history['query'].unique().tolist() 141 | if user_query_text not in query_history: 142 | search_results = ddg_search(user_query_text, 3) 143 | update_history(search_results) 144 | else: 145 | search_results = google_history 146 | 147 | similar_results = find_top_similar_results(search_results, user_query_text, 5) 148 | google_findings = similar_results['text'].to_list() 149 | links = similar_results['link'].to_list() 150 | return google_findings, links 151 | 152 | def split_paragraphs(paragraphs, max_length=1000): 153 | split_paragraphs = [] 154 | for paragraph in paragraphs: 155 | # Split the paragraph until no parts are larger than the max length 156 | while len(paragraph) > max_length: 157 | split_index = paragraph.find('. ', max_length) 158 | # If there's no '. ' after the max length, check for the next instance of '.[' 159 | if split_index == -1: 160 | split_index = paragraph.find('.[', max_length) 161 | # If there's no instance of '.[' after the max length, just split at the max length 162 | if split_index == -1: 163 | split_index = max_length 164 | split_paragraph = paragraph[:split_index] 165 | # Indicate where strings were split with '(...)' 166 | if split_paragraph.startswith('.'): 167 | split_paragraph = '(...)' + split_paragraph[1:] 168 | else: 169 | split_paragraph += '(...)' 170 | split_paragraphs.append(split_paragraph) 171 | paragraph = paragraph[split_index:] 172 | split_paragraphs.append(paragraph) 173 | return split_paragraphs 174 | 175 | 176 | def load_google_history(): 177 | # Try to find the search history, if it's empty or it can't find it, create a new search history 178 | try: 179 | data = db.get_user_search_history(st.session_state['username']) 180 | if data == []: 181 | return pd.DataFrame(columns=['text', 'link', 'query', 'text_length', 'ada_search']) 182 | else: 183 | return pd.DataFrame(data).drop(['key', 'username'], axis=1) 184 | except: 185 | warning('Could not fetch history from database') 186 | data = pd.DataFrame(columns=['text', 'link', 'query', 'text_length', 'ada_search']) 187 | return data 188 | 189 | 190 | def save_google_history(results): 191 | username = st.session_state['username'] 192 | results['username'] = [username for i in range(len(results.index))] 193 | entries = separate_list(results.to_dict('records'), 25) 194 | for entry in entries: 195 | db.insert_search_history(entry) 196 | 197 | 198 | def save_google_history_in_thread(results): 199 | thread = threading.Thread(target=save_google_history, args=(results)) 200 | thread.start() 201 | 202 | 203 | def get_user_search_history(): 204 | if 'google_history' not in st.session_state: 205 | st.session_state['google_history'] = load_google_history() 206 | return st.session_state['google_history'] 207 | 208 | 209 | def update_history(results): 210 | #with st.spinner('Committing to memory...'): 211 | #save_google_history(results) 212 | 213 | history = st.session_state['google_history'] 214 | 215 | if history.empty: 216 | history = results 217 | else: 218 | history = pd.concat([history, results]).drop_duplicates(subset=['text']) 219 | 220 | st.session_state['google_history'] = history 221 | 222 | 223 | def display_search_results(user_query_text, google_findings, links): 224 | if len(user_query_text) > 0: 225 | st.markdown('---') 226 | st.markdown(f'# {user_query_text}') 227 | for i,finding in enumerate(google_findings): 228 | st.markdown(markdown_litteral(finding) + f' [Source]({links[i]})') 229 | 230 | 231 | def all_are_valid_links(links): 232 | for link in links: 233 | try: 234 | res = requests.get(link) 235 | res.raise_for_status() 236 | return True 237 | except: 238 | st.warning(f"The following link does not respond. Please check if it is correct and try again. \ 239 | \n{link}",icon="⚠️") 240 | st.stop() 241 | 242 | 243 | def delete_search_history(): 244 | db.delete_search_history(st.session_state['username']) 245 | 246 | 247 | def delete_history_button(): 248 | st.button('Delete search history', on_click=delete_search_history) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.6.2 2 | plotly==5.11.0 3 | scipy==1.9.3 4 | scikit-learn==1.2.0 5 | bs4==0.0.1 6 | streamlit==1.16.0 7 | openai==0.27.0 8 | tiktoken==0.1.1 9 | streamlit_lottie==0.0.3 10 | streamlit-authenticator==0.1.5 11 | deta==1.1.0 12 | streamlit-extras==0.2.4 13 | cryptography==38.0.4 14 | PyPDF2==2.12.0 15 | duckduckgo-search==2.8.0 16 | pycryptodome -------------------------------------------------------------------------------- /streamlit_app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from streamlit_extras import buy_me_a_coffee 3 | from streamlit_extras.add_vertical_space import add_vertical_space 4 | from streamlit_extras.badges import badge 5 | from streamlit_lottie import st_lottie 6 | import openai 7 | from utils import load_lottie_url 8 | from api_key import load_api_key, reset_key_button 9 | from internet_search import * 10 | from assistant import * 11 | from gpt_api import find_top_similar_results 12 | from auth import authenticate_user, logout_button 13 | from database import delete_user_button 14 | 15 | st.set_page_config(page_title='Quest🔍') 16 | st.title("Quest🔍") 17 | st.markdown('Tired of sifting through search results to find the \ 18 | information you need? The Assistant can take care of it for you! \ 19 | This open source AI-powered personal assistant can access the internet, \ 20 | providing both quick and accurate answers to your questions.') 21 | 22 | # Create Sidebar 23 | with st.sidebar: 24 | lottie_image1 = load_lottie_url('https://assets10.lottiefiles.com/packages/lf20_ofa3xwo7.json') 25 | st_lottie(lottie_image1) 26 | 27 | #authenticate_user() 28 | 29 | openai.api_key = load_api_key() 30 | 31 | if 'settings' not in st.session_state: 32 | st.session_state['settings'] = {} 33 | 34 | # App layout 35 | tab1, tab2, tab3, tab4 = st.tabs(["Have a conversation", "Internet search", "Create your Assistant", "Settings"]) 36 | 37 | # Have a conversation tab 38 | with tab1: 39 | response = st.container() 40 | chat = st.container() 41 | 42 | # Internet search tab 43 | with tab2: 44 | st.markdown("\ 45 | Tell the Assistant what to research about.", unsafe_allow_html=True) 46 | st.markdown("This tab allows you to give information from across the internet to the Assistant AI. \ 47 | Once you've told it all the topics to search for, you can have a conversation with it in the \ 48 | 'Have a conversation' tab.") 49 | with st.spinner("Getting search history..."): 50 | google_history = get_user_search_history() 51 | unique_searches = google_history['query'].unique().tolist() 52 | unique_searches.insert(0,'') 53 | initial_search = st.selectbox('Search history', unique_searches, index=0) 54 | search = st.container() 55 | 56 | with tab3: 57 | st.write("Comming soon...", unsafe_allow_html=True) 58 | st.write("In this page you will be able to create custom Assistant archetypes.") 59 | 60 | with tab4: 61 | #logout_button() 62 | reset_key_button() 63 | #delete_history_button() 64 | #delete_user_button() 65 | 66 | # Google search section 67 | with search: 68 | with st.form('Google'): 69 | user_query_text = st.text_input(label='Google search',value=initial_search, help="This tab \ 70 | allows you to give information from across the internet to the Assistant AI. Once you've \ 71 | told it all the topics to search for, you can have a conversation with it in the \ 72 | 'Have a conversation' tab.") 73 | google_submitted = st.form_submit_button("Submit") 74 | 75 | # If the user pressed submit to make a new search or selected an existing one from history 76 | if (google_submitted and user_query_text != '') or initial_search != '': 77 | google_findings, links = make_new_internet_search(user_query_text) 78 | 79 | display_search_results(user_query_text, google_findings, links) 80 | 81 | # Section where user inputs directly to GPT 82 | with chat: 83 | with st.form('Chat'): 84 | user_chat_text = st.text_area(label="Ask the Assistant") 85 | col1, col2 = st.columns(2) 86 | chat_submitted = col1.form_submit_button("Submit") 87 | settings = assistant_settings(chat_submitted, col2) 88 | add_searches(settings) 89 | 90 | 91 | # User input is used here to process and display GPT's response 92 | with response: 93 | if 'archetype' not in settings: 94 | archetypes, default_setting_index = load_assistant_settings() 95 | default_setting = list(archetypes.keys())[default_setting_index] 96 | settings['archetype'] = archetypes[default_setting] 97 | starting_conversation = settings['archetype']['starting_conversation'] 98 | load_conversation(starting_conversation) 99 | display_chat_history(starting_conversation) 100 | if chat_submitted: 101 | submit_user_message(settings, user_chat_text, chat_submitted) 102 | 103 | add_vertical_space(4) 104 | 105 | col1, col2, col3 = st.columns(3) 106 | with col1: 107 | buy_me_a_coffee.button('farrael004', floating=False) 108 | with col2: 109 | st.markdown("By [Rafael Moraes](https://github.com/farrael004)") 110 | badge(type="github", name="farrael004/Quest") 111 | with col3: 112 | st.container() 113 | 114 | -------------------------------------------------------------------------------- /tutorial/Tutorial1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/farrael004/Quest/86093e81702be77c73cbdb1881a6fce5c27680a3/tutorial/Tutorial1.png -------------------------------------------------------------------------------- /tutorial/Tutorial2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/farrael004/Quest/86093e81702be77c73cbdb1881a6fce5c27680a3/tutorial/Tutorial2.png -------------------------------------------------------------------------------- /tutorial/Tutorial3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/farrael004/Quest/86093e81702be77c73cbdb1881a6fce5c27680a3/tutorial/Tutorial3.png -------------------------------------------------------------------------------- /tutorial/Tutorial4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/farrael004/Quest/86093e81702be77c73cbdb1881a6fce5c27680a3/tutorial/Tutorial4.png -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from tiktoken import get_encoding 2 | import streamlit as st 3 | import requests 4 | from logging import warning 5 | from itertools import zip_longest 6 | 7 | tokenizer = get_encoding("gpt2") 8 | def num_of_tokens(prompt: str): 9 | return len(tokenizer.encode(prompt)) 10 | 11 | def markdown_litteral(string: str): 12 | return string.replace('$','\$') 13 | 14 | @st.cache 15 | def load_lottie_url(url: str): 16 | try: 17 | r = requests.get(url) 18 | if r.status_code != 200: 19 | return None 20 | return r.json() 21 | except: 22 | warning(f'Could not find lottie from url {url}.') 23 | return None 24 | 25 | def api_error_warning(): 26 | st.warning("Something went wrong. \n \n> An error occured when trying to send your request to OpenAI.There are a few reasons why this could happen: \n> - You exceeded your rate limit. Make sure you are not using free credits as the rate limit in a free account does not permit using this app. \n> - This service cannot communicate with OpenAI's API. \n> - You exceeded your allowance. Check your usage and limit [here](https://beta.openai.com/account/usage) \n> - You entered an invalid API key. Try getting a new key [here](https://beta.openai.com/account/api-keys) and reset your API key in the settings tab.", 27 | icon='⚠️') 28 | 29 | def separate_list(iterable, n): 30 | # Collect data into fixed-length chunks or blocks 31 | args = [iter(iterable)] * n 32 | groups = zip_longest(*args, fillvalue=None) # ('ABCDEFG', 3, 'None') --> ((A,B,C), (D,E,F), (G,None,None)) 33 | result = list(groups) 34 | return [list(filter(lambda x: x is not None, sublist)) for sublist in result] # Remove None 35 | 36 | def tell_to_reload_page(): 37 | st.write("# 🔄Reload the page") 38 | st.write("Reload this page to apply changes") 39 | st.stop() --------------------------------------------------------------------------------