├── .gitignore ├── LICENSE ├── README.md ├── app ├── 01_run_app.bat ├── 02_run_app.sh ├── app.py ├── cfg │ └── settings.yaml └── db │ ├── gpt3sql.sqlite │ ├── merge_db.ipynb │ ├── merge_db.py │ └── sqlite.sql ├── docs ├── 0-about.PNG ├── 1-gen-sql.png ├── 2-run-sql.PNG ├── 3-sample-data.PNG ├── 4-config.png ├── edit-fix-error.png ├── gen_code.png └── openai_models.csv ├── examples ├── codex-quickstart.ipynb ├── dev.ipynb ├── example-python.md ├── example-sql.md ├── gpt3sql-notebook.ipynb ├── launch_notebook.bat ├── prime.py ├── prime_num.ps1 └── st_app_goog_chart.py └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | **/*api_key*.yaml 2 | **/settings*Copy.yaml 3 | **/Untitled*.ipynb 4 | **/gpt3sql-*.sqlite 5 | dev/ 6 | 7 | # Byte-compiled / optimized / DLL files 8 | __pycache__/ 9 | *.py[cod] 10 | *$py.class 11 | 12 | # C extensions 13 | *.so 14 | 15 | # Distribution / packaging 16 | .Python 17 | build/ 18 | develop-eggs/ 19 | dist/ 20 | downloads/ 21 | eggs/ 22 | .eggs/ 23 | lib/ 24 | lib64/ 25 | parts/ 26 | sdist/ 27 | var/ 28 | wheels/ 29 | pip-wheel-metadata/ 30 | share/python-wheels/ 31 | *.egg-info/ 32 | .installed.cfg 33 | *.egg 34 | MANIFEST 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .nox/ 50 | .coverage 51 | .coverage.* 52 | .cache 53 | nosetests.xml 54 | coverage.xml 55 | *.cover 56 | *.py,cover 57 | .hypothesis/ 58 | .pytest_cache/ 59 | 60 | # Translations 61 | *.mo 62 | *.pot 63 | 64 | # Django stuff: 65 | *.log 66 | local_settings.py 67 | db.sqlite3 68 | db.sqlite3-journal 69 | 70 | # Flask stuff: 71 | instance/ 72 | .webassets-cache 73 | 74 | # Scrapy stuff: 75 | .scrapy 76 | 77 | # Sphinx documentation 78 | docs/_build/ 79 | 80 | # PyBuilder 81 | target/ 82 | 83 | # Jupyter Notebook 84 | .ipynb_checkpoints 85 | 86 | # IPython 87 | profile_default/ 88 | ipython_config.py 89 | 90 | # pyenv 91 | .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 101 | __pypackages__/ 102 | 103 | # Celery stuff 104 | celerybeat-schedule 105 | celerybeat.pid 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # Environments 111 | .env 112 | .venv 113 | env/ 114 | venv/ 115 | ENV/ 116 | env.bak/ 117 | venv.bak/ 118 | 119 | # Spyder project settings 120 | .spyderproject 121 | .spyproject 122 | 123 | # Rope project settings 124 | .ropeproject 125 | 126 | # mkdocs documentation 127 | /site 128 | 129 | # mypy 130 | .mypy_cache/ 131 | .dmypy.json 132 | dmypy.json 133 | 134 | # Pyre type checker 135 | .pyre/ 136 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # gpt3sql 2 | 3 | ## Use GPT3 to generate SQL from text 4 | 5 | Develop a streamlit app to explore GPT-3 Codex capability (https://beta.openai.com/docs/guides/code/introduction) in terms of SQL generation 6 | - Experiment with and validate GPT-3 capability; 7 | - Target SQLite database; 8 | - Use sample dataset from https://www.sqlitetutorial.net/sqlite-sample-database/; 9 | 10 | ## Get started 11 | 12 | Get your own API Key at https://beta.openai.com/, save it into a new file at `app/cfg/api_key.yaml` (make sure this file is gitignored): 13 | ``` 14 | OPENAI_API_KEY: 15 | ``` 16 | 17 | Run the following commands a shell terminal: 18 | ``` 19 | $ pip install -r requirements.txt 20 | $ cd app 21 | $ streamlit run app.py 22 | ``` 23 | 24 | ## Example 25 | 26 | Prompt submitted at [Playground](https://beta.openai.com/playground?mode=complete&model=davinci-instruct-beta) 27 | 28 | ``` 29 | """ 30 | Table customers, columns = [CustomerId, FirstName, LastName, Company, Address, City, State, Country, PostalCode, Phone, Fax, Email, SupportRepId] 31 | Create a SQLite query for all customers in Texas named Jane 32 | """ 33 | 34 | ``` 35 | 36 | 37 | The Python code: 38 | ``` 39 | import os 40 | import openai 41 | 42 | openai.api_key = os.getenv("OPENAI_API_KEY") 43 | 44 | # POST /v1/completions 45 | 46 | response = openai.Completion.create( 47 | model="davinci-instruct-beta", 48 | prompt="\"\"\"\nTable customers, columns = [CustomerId, FirstName, LastName, State]\nCreate a SQLite query for all customers in Texas named Jane\n\"\"\"\n\n\n", 49 | temperature=0, 50 | max_tokens=256, 51 | top_p=1, 52 | frequency_penalty=0, 53 | presence_penalty=0 54 | ) 55 | 56 | ``` 57 | 58 | Response: 59 | ``` 60 | print(response["choices"][0]["text"]) 61 | 62 | SELECT * FROM customers WHERE State='TX' AND FirstName='Jane' 63 | ``` 64 | ## References 65 | 66 | ### OpenAI 67 | 68 | - [OpenAI Community Forum](https://community.openai.com/) 69 | - [GPT-3: All you need to know about the AI language model](https://www.sigmoid.com/blogs/gpt-3-all-you-need-to-know-about-the-ai-language-model/) 70 | - [New Version of GPT-3 Is Much Better](https://towardsdatascience.com/the-new-version-of-gpt-3-is-much-much-better-53ac95f21cfb) 71 | - [gpt3-sandbox repo](https://github.com/shreyashankar/gpt3-sandbox) 72 | 73 | ### Streamlit 74 | 75 | - [Streamlit Community Forum](https://discuss.streamlit.io/) 76 | - [GPT-3 Demo Showcase](https://gpt3demo.com/s/streamlit-io) 77 | - https://streamlit-example-app-streamlit-codex-streamlit-app-wfi4of.streamlitapp.com/ 78 | 79 | ### SQL 80 | 81 | - [SQLTeam Forum](https://forums.sqlteam.com/) 82 | - [SQLite Tutorial](https://www.sqlitetutorial.net/) 83 | - [Automating my job by using GPT-3 to generate database-ready SQL to answer business questions](https://blog.seekwell.io/gpt3) 84 | 85 | - [Text-to-SQL Generation for Question Answering on Electronic Medical Records](https://github.com/wangpinggl/TREQS) 86 | ### Others 87 | 88 | 89 | - [Prompt Engineering: The Career of Future](https://medium.com/nerd-for-tech/prompt-engineering-the-career-of-future-2fb93f90f117) 90 | - [Are You Intelligent Enough To Become An AI Enhanced Human?](https://community.openai.com/t/are-you-intelligent-enough-to-become-an-ai-enhanced-human/22173) 91 | 92 | - [SUSTAINABLE AI: ENVIRONMENTAL IMPLICATIONS, CHALLENGES AND OPPORTUNITIES](https://proceedings.mlsys.org/paper/2022/file/ed3d2c21991e3bef5e069713af9fa6ca-Paper.pdf) 93 | 94 | ## Credits 95 | -------------------------------------------------------------------------------- /app/01_run_app.bat: -------------------------------------------------------------------------------- 1 | streamlit run app.py -------------------------------------------------------------------------------- /app/02_run_app.sh: -------------------------------------------------------------------------------- 1 | streamlit run app.py -------------------------------------------------------------------------------- /app/app.py: -------------------------------------------------------------------------------- 1 | """ 2 | Streamlit app to experiment with GPT3 models for code generation (SQL, Python) 3 | 4 | - request/response are logged into SQLite Database 5 | - SQL can be validated against sample db 6 | - Python can be validated because streamlit uses python 7 | - Javascript can be validated using Dev Console of a native browser 8 | 9 | """ 10 | __author__ = "wgong" 11 | SRC_URL = "https://github.com/wgong/gpt3sql" 12 | 13 | ##################################################### 14 | # Imports 15 | ##################################################### 16 | # generic import 17 | from datetime import datetime, date, timedelta 18 | from os.path import exists 19 | from traceback import format_exc 20 | from uuid import uuid4 21 | import sqlite3 22 | import pandas as pd 23 | import yaml 24 | from traceback import format_exc 25 | import sys 26 | from io import StringIO 27 | 28 | import streamlit as st 29 | from st_aggrid import GridOptionsBuilder, AgGrid, GridUpdateMode, DataReturnMode 30 | 31 | # import modules of this app 32 | import openai 33 | 34 | _STR_APP_NAME = "GPT-3 Codex" 35 | 36 | st.set_page_config( 37 | page_title=f'{_STR_APP_NAME}', 38 | layout="wide", 39 | initial_sidebar_state="expanded", 40 | ) 41 | 42 | CFG = dict() 43 | KEY = dict() 44 | 45 | _STR_MENU_HOME = "Welcome" 46 | _STR_MENU_SQL_GEN_RUN = "Generate/Run Code" 47 | _STR_MENU_SQL_GEN = "Generate Code" 48 | _STR_MENU_SQL_RUN = "Review/Run Code" 49 | _STR_MENU_SQLITE_SAMPLE = "Explore SQLite Sample DB" 50 | _STR_MENU_SETTINGS = "Configure Settings" 51 | _STR_MENU_NOTES = "Take Notes" 52 | 53 | STR_DOUBLE_CLICK = "Double-click to commit changes" 54 | STR_FETCH_LOG = "Get the latest log" 55 | 56 | PROMPT_DELIMITOR = '\"\"\"' 57 | PROMPT_LIST = [PROMPT_DELIMITOR, "#", "//", "/* */", "--", ""] 58 | 59 | # Aggrid options 60 | _GRID_OPTIONS = { 61 | "grid_height": 350, 62 | "return_mode_value": DataReturnMode.__members__["FILTERED"], 63 | "update_mode_value": GridUpdateMode.__members__["MODEL_CHANGED"], 64 | "fit_columns_on_grid_load": False, # False to display wide columns 65 | # "min_column_width": 50, 66 | "selection_mode": "single", # "multiple", # 67 | "allow_unsafe_jscode": True, 68 | "groupSelectsChildren": True, 69 | "groupSelectsFiltered": True, 70 | "enable_pagination": True, 71 | "paginationPageSize": 10, 72 | } 73 | 74 | TABLE_GPT3_LOG = "T_GPT3_LOG" 75 | TABLE_NOTES = "t_resource" 76 | 77 | EDITABLE_COLUMNS = { 78 | TABLE_GPT3_LOG : [], # ["comment"], 79 | } 80 | 81 | EXAMPLE_PROMPT = { 82 | "SQL" : ''' 83 | Table customers, columns = [CustomerId, FirstName, LastName, City, State, Country] 84 | Create a SQLite query for all customers in city of Cupertino, country of USA 85 | ''', 86 | "Python" : ''' 87 | # Python 3 88 | # Create a function to calculate prime numbers less than 20 89 | ''', 90 | "JavaScript" : ''' 91 | /* Create a JavaScript dictionary of 5 countries and capitals */ 92 | ''', 93 | "Explain" : ''' 94 | SELECT DISTINCT department.name 95 | FROM department 96 | JOIN employee ON department.id = employee.department_id 97 | JOIN salary_payments ON employee.id = salary_payments.employee_id 98 | WHERE salary_payments.date BETWEEN '2020-06-01' AND '2020-06-30' 99 | GROUP BY department.name 100 | HAVING COUNT(employee.id) > 10; 101 | -- Explanation of the above query in human readable format 102 | -- 103 | ''', 104 | "Ask_Anything" : ''' 105 | What is Deep Learning? 106 | ''', 107 | "Math" : ''' 108 | What is the value of e? 109 | ''', 110 | "Science" : ''' 111 | What is the theory of special relativity? 112 | ''', 113 | 114 | } 115 | 116 | 117 | ##################################################### 118 | # Helpers (prefix with underscore) 119 | ##################################################### 120 | def _remove_leading_hash(s): 121 | lines = [] 122 | for i in s.split("\n"): 123 | i = i.strip() 124 | if (len(i) > 0 and i[0] == "#"): 125 | lines.append(i[1:]) 126 | else: 127 | lines.append(i) 128 | return "\n".join(lines) 129 | 130 | def _escape_single_quote(s): 131 | return s.replace("\'", "\'\'") 132 | 133 | def _unescape_single_quote(s): 134 | return s.replace("\'\'", "\'") 135 | 136 | def _move_item_to_first(lst, item): 137 | """Move item found in a list to position 0 138 | """ 139 | try: 140 | idx = lst.index(item) 141 | except: 142 | idx = -1 143 | if idx < 1: 144 | return lst 145 | 146 | lst_new = lst.copy() 147 | lst_new.pop(idx) 148 | lst_new.insert(0, item) 149 | return lst_new 150 | 151 | class DBConn(object): 152 | def __init__(self, db_file): 153 | self.conn = sqlite3.connect(db_file) 154 | def __enter__(self): 155 | return self.conn 156 | def __exit__(self, type, value, traceback): 157 | self.conn.close() 158 | 159 | def _get_tables(): 160 | """get a list of tables from SQLite database 161 | """ 162 | with DBConn(CFG["DB_FILE"]) as _conn: 163 | sql_stmt = f''' 164 | SELECT 165 | name 166 | FROM 167 | sqlite_schema 168 | WHERE 169 | type ='table' AND 170 | name NOT LIKE 'sqlite_%'; 171 | ''' 172 | df = pd.read_sql(sql_stmt, _conn) 173 | return df["name"].to_list() 174 | 175 | def _load_settings(): 176 | global CFG,KEY 177 | with open("cfg/settings.yaml") as f: 178 | CFG = yaml.load(f.read(), Loader=yaml.SafeLoader) 179 | 180 | if exists(CFG["API_KEY_FILE"]): 181 | with open(CFG["API_KEY_FILE"]) as f: 182 | KEY = yaml.load(f.read(), Loader=yaml.SafeLoader) 183 | if "openai_mode" not in st.session_state: 184 | st.session_state["openai_mode"] = CFG["Mode"][0] 185 | if "openai_model" not in st.session_state: 186 | st.session_state["openai_model"] = CFG["Model"][0] 187 | if "openai_use_case" not in st.session_state: 188 | st.session_state["openai_use_case"] = CFG["Use_case"][0] 189 | if "openai_temperature" not in st.session_state: 190 | st.session_state["openai_temperature"] = CFG["Temperature"] 191 | if "openai_maximum_length" not in st.session_state: 192 | st.session_state["openai_maximum_length"] = CFG["Maximum_length"] 193 | if "openai_top_p" not in st.session_state: 194 | st.session_state["openai_top_p"] = CFG["Top_p"] 195 | if "openai_frequency_penalty" not in st.session_state: 196 | st.session_state["openai_frequency_penalty"] = CFG["Frequency_penalty"] 197 | if "openai_presence_penalty" not in st.session_state: 198 | st.session_state["openai_presence_penalty"] = CFG["Presence_penalty"] 199 | 200 | 201 | def _save_settings(): 202 | global CFG,KEY 203 | with open("cfg/settings.yaml", "w") as f: 204 | modes = CFG["Mode"] 205 | models = CFG["Model"] 206 | use_cases = CFG["Use_case"] 207 | CFG = { 208 | "Mode": _move_item_to_first(modes, st.session_state.get("openai_mode")), 209 | "Model": _move_item_to_first(models, st.session_state.get("openai_model")), 210 | "Use_case": _move_item_to_first(use_cases, st.session_state.get("openai_use_case")), 211 | "Temperature": st.session_state.get("openai_temperature"), 212 | "Maximum_length": st.session_state.get("openai_maximum_length"), 213 | "Top_p": st.session_state.get("openai_top_p"), 214 | "Frequency_penalty": st.session_state.get("openai_frequency_penalty"), 215 | "Presence_penalty": st.session_state.get("openai_presence_penalty"), 216 | "Input_prefix": st.session_state.get("openai_input_prefix"), 217 | "Input_suffix": st.session_state.get("openai_input_suffix"), 218 | "Output_prefix": st.session_state.get("openai_output_prefix"), 219 | "Output_suffix": st.session_state.get("openai_output_suffix"), 220 | "API_KEY_FILE": st.session_state.get("api_key_file"), 221 | "DB_FILE": st.session_state.get("sqlite_db_file"), 222 | } 223 | yaml.dump(CFG, f, default_flow_style=False) 224 | 225 | with open(CFG["API_KEY_FILE"], "w") as f: 226 | KEY = { 227 | "OPENAI_API_KEY": st.session_state.get("openai_api_key") 228 | } 229 | yaml.dump(KEY, f, default_flow_style=False) 230 | 231 | def _select_log(): 232 | with DBConn(CFG["DB_FILE"]) as _conn: 233 | sql_stmt = f""" 234 | select ts,use_case,prompt,comment,output,valid_output,settings,uuid 235 | from {TABLE_GPT3_LOG} 236 | order by ts desc ; 237 | """ 238 | return pd.read_sql(sql_stmt, _conn) 239 | 240 | def _insert_log(use_case, settings, prompt, output, comment='', valid_output=''): 241 | with DBConn(CFG["DB_FILE"]) as _conn: 242 | insert_sql = f""" 243 | insert into {TABLE_GPT3_LOG} ( 244 | uuid, ts, use_case, settings, prompt, output,comment,valid_output 245 | ) 246 | values ( 247 | '{str(uuid4())}', 248 | '{str(datetime.now())}', 249 | '{use_case}', 250 | '{_escape_single_quote(settings)}', 251 | '{_escape_single_quote(prompt)}', 252 | '{_escape_single_quote(output)}', 253 | '{_escape_single_quote(comment)}', 254 | '{_escape_single_quote(valid_output)}' 255 | ); 256 | """ 257 | print(insert_sql) 258 | cur = _conn.cursor() 259 | cur.executescript(insert_sql) 260 | _conn.commit() 261 | 262 | def _delete_log(): 263 | data = st.session_state.get("LOG_DELETE_DATA") 264 | uuid = data.get("uuid") 265 | if not uuid: 266 | return 267 | 268 | with DBConn(CFG["DB_FILE"]) as _conn: 269 | delete_sql = f""" 270 | delete from {TABLE_GPT3_LOG} 271 | where uuid = '{uuid}'; 272 | """ 273 | print(delete_sql) 274 | cur = _conn.cursor() 275 | cur.executescript(delete_sql) 276 | _conn.commit() 277 | 278 | def _update_log(): 279 | data = st.session_state.get("LOG_UPDATE_DATA") 280 | if not data or len(data) < 3: 281 | return # id,ts populated by default 282 | 283 | with DBConn(CFG["DB_FILE"]) as _conn: 284 | set_clause = [] 285 | for col,val in data.items(): 286 | if col == "uuid": continue 287 | set_clause.append(f"{col} = '{_escape_single_quote(val)}'") 288 | update_sql = f""" 289 | update {TABLE_GPT3_LOG} 290 | set {', '.join(set_clause)} 291 | where uuid = '{data.get("uuid")}'; 292 | """ 293 | print(update_sql) 294 | cur = _conn.cursor() 295 | cur.executescript(update_sql) 296 | _conn.commit() 297 | 298 | def _display_refresh_log(): 299 | c1, _, c2 = st.columns([3,2,3]) 300 | with c1: 301 | if st.button('Refresh'): 302 | pass 303 | with c2: 304 | st.info(STR_FETCH_LOG) 305 | 306 | def _display_delete_log(selected_row): 307 | data = {"uuid" : selected_row.get("uuid", "")} 308 | st.session_state.update({"LOG_DELETE_DATA": data}) 309 | c1, _, c2 = st.columns([3,2,3]) 310 | with c1: 311 | btn_delete = st.button('Delete') 312 | with c2: 313 | st.info(STR_DOUBLE_CLICK) 314 | if btn_delete: 315 | _delete_log() 316 | 317 | def _display_update_log(selected_row): 318 | st.session_state.update({"LOG_SELECTED_ROW": selected_row}) 319 | with st.form(key="update_log"): 320 | col_left,col_right = st.columns([5,5]) 321 | data = {"ts": str(datetime.now())} 322 | 323 | with col_left: 324 | 325 | ts_old = selected_row.get("ts") 326 | ts = st.text_input("ts", value=ts_old, disabled=True) 327 | 328 | use_case_old = selected_row.get("use_case") 329 | use_case = st.text_input("use_case", value=use_case_old) 330 | if use_case != use_case_old: data.update({"use_case" : use_case}) 331 | 332 | output_old = selected_row.get("output") 333 | output = st.text_area("output", value=output_old, height=100) 334 | if output != output_old: data.update({"output" : output}) 335 | # print(f"output_old = {output_old}") 336 | # print(f"output = {output}") 337 | 338 | valid_output_old = selected_row.get("valid_output") 339 | valid_output = st.text_area("valid_output", value=valid_output_old, height=50) 340 | if valid_output != valid_output_old: data.update({"valid_output" : valid_output}) 341 | with col_right: 342 | 343 | id = st.text_input("uuid", value=selected_row.get("uuid"), disabled=True) 344 | data.update({"uuid" : id}) 345 | 346 | settings_old = selected_row.get("settings") 347 | settings = st.text_input("settings", value=settings_old, disabled=True) 348 | 349 | prompt_old = selected_row.get("prompt") 350 | prompt = st.text_area("prompt", value=prompt_old, height=100, disabled=True) 351 | 352 | comment_old = selected_row.get("comment") 353 | comment = st.text_area("comment", value=comment_old, height=30) 354 | if comment != comment_old: data.update({"comment" : comment}) 355 | 356 | st.session_state.update({"LOG_UPDATE_DATA": data}) 357 | c1, _, c2 = st.columns([3,2,3]) 358 | with c1: 359 | st.form_submit_button('Update', on_click=_update_log) 360 | with c2: 361 | st.info(STR_DOUBLE_CLICK) 362 | 363 | 364 | 365 | 366 | def _display_grid_df(df, 367 | selection_mode="multiple", 368 | page_size=_GRID_OPTIONS["paginationPageSize"], 369 | grid_height=_GRID_OPTIONS["grid_height"]): 370 | """show df in a grid and return selected row 371 | """ 372 | # st.dataframe(df) 373 | gb = GridOptionsBuilder.from_dataframe(df) 374 | gb.configure_selection(selection_mode, 375 | use_checkbox=True, 376 | groupSelectsChildren=_GRID_OPTIONS["groupSelectsChildren"], 377 | groupSelectsFiltered=_GRID_OPTIONS["groupSelectsFiltered"] 378 | ) 379 | gb.configure_pagination(paginationAutoPageSize=False, 380 | paginationPageSize=page_size) 381 | gb.configure_columns(EDITABLE_COLUMNS[f"{TABLE_GPT3_LOG}"], editable=True) 382 | gb.configure_grid_options(domLayout='normal') 383 | grid_response = AgGrid( 384 | df, 385 | gridOptions=gb.build(), 386 | height=grid_height, 387 | # width='100%', 388 | data_return_mode=_GRID_OPTIONS["return_mode_value"], 389 | update_mode=_GRID_OPTIONS["update_mode_value"], 390 | fit_columns_on_grid_load=_GRID_OPTIONS["fit_columns_on_grid_load"], 391 | allow_unsafe_jscode=True, #Set it to True to allow jsfunction to be injected 392 | ) 393 | 394 | return grid_response 395 | 396 | 397 | 398 | def _display_grid_gpt3_log(page_size=10, grid_height=370): 399 | with st.expander("Review logs of promp/response: ", expanded=False): 400 | df_log = _select_log() 401 | _display_refresh_log() 402 | 403 | grid_response = _display_grid_df(df_log, selection_mode="single", page_size=page_size, grid_height=grid_height) 404 | if grid_response: 405 | selected_rows = grid_response['selected_rows'] 406 | if selected_rows: 407 | _display_delete_log(selected_rows[0]) 408 | _display_update_log(selected_rows[0]) 409 | 410 | 411 | def _select_note(): 412 | with DBConn(CFG["DB_FILE"]) as _conn: 413 | sql_stmt = f""" 414 | select ts,topic,url,comment,uuid 415 | from {TABLE_NOTES} 416 | order by ts desc ; 417 | """ 418 | return pd.read_sql(sql_stmt, _conn) 419 | 420 | def _update_note(data): 421 | # print(f"_update_note: \n{data}") 422 | if not data or len(data) < 3: 423 | return # id,ts populated by default 424 | 425 | with DBConn(CFG["DB_FILE"]) as _conn: 426 | set_clause = [] 427 | for col,val in data.items(): 428 | if col == "uuid": continue 429 | set_clause.append(f"{col} = '{_escape_single_quote(val)}'") 430 | update_sql = f""" 431 | update {TABLE_NOTES} 432 | set {', '.join(set_clause)} 433 | where uuid = '{data.get("uuid")}'; 434 | """ 435 | print(update_sql) 436 | cur = _conn.cursor() 437 | cur.executescript(update_sql) 438 | _conn.commit() 439 | 440 | def _delete_note(data): 441 | # print(f"_delete_note: \n{data}") 442 | uuid = data.get("uuid") 443 | if not uuid: 444 | return 445 | with DBConn(CFG["DB_FILE"]) as _conn: 446 | delete_sql = f""" 447 | delete from {TABLE_NOTES} 448 | where uuid = '{uuid}'; 449 | """ 450 | print(delete_sql) 451 | cur = _conn.cursor() 452 | cur.executescript(delete_sql) 453 | _conn.commit() 454 | 455 | def _insert_note(data): 456 | # print(f"_insert_note: \n{data}") 457 | uuid = data.get("uuid") 458 | ts = data.get("ts") 459 | topic = data.get("topic") 460 | url = data.get("url") 461 | comment = data.get("comment") 462 | if not any([topic, url, comment]): 463 | return 464 | 465 | with DBConn(CFG["DB_FILE"]) as _conn: 466 | insert_sql = f""" 467 | insert into {TABLE_NOTES} ( 468 | uuid, ts, topic, url, comment 469 | ) 470 | values ( 471 | '{uuid}', 472 | '{ts}', 473 | '{_escape_single_quote(topic)}', 474 | '{_escape_single_quote(url)}', 475 | '{_escape_single_quote(comment)}' 476 | ); 477 | """ 478 | print(insert_sql) 479 | cur = _conn.cursor() 480 | cur.executescript(insert_sql) 481 | _conn.commit() 482 | 483 | 484 | def _display_grid_notes(): 485 | df_note = _select_note() 486 | grid_response = _display_grid_df(df_note, selection_mode="single", page_size=5, grid_height=220) 487 | selected_row = None 488 | if grid_response: 489 | selected_rows = grid_response['selected_rows'] 490 | if selected_rows and len(selected_rows): 491 | selected_row = selected_rows[0] 492 | 493 | ts_old = selected_row.get("ts") if selected_row is not None else "" 494 | uuid_old = selected_row.get("uuid") if selected_row is not None else "" 495 | topic_old = selected_row.get("topic") if selected_row is not None else "" 496 | url_old = selected_row.get("url") if selected_row is not None else "" 497 | comment_old = selected_row.get("comment") if selected_row is not None else "" 498 | 499 | col_left,col_right = st.columns([5,5]) 500 | data = {"ts": str(datetime.now())} 501 | with col_left: 502 | ts = st.text_input("ts", value=ts_old, disabled=True, key="note_ts") 503 | topic = st.text_input("topic", value=topic_old, key="note_topic") 504 | if topic != topic_old: data.update({"topic" : topic}) 505 | url = st.text_input("URL", value=url_old, key="note_url") 506 | if url != url_old: data.update({"url" : url}) 507 | 508 | with col_right: 509 | id = st.text_input("uuid", value=uuid_old, disabled=True) 510 | data.update({"uuid" : id}) 511 | comment = st.text_area("comment", value=comment_old, height=125) 512 | if comment != comment_old: data.update({"comment" : comment}) 513 | 514 | c0, c1, c2, c3, _ = st.columns([1,1,1,1,6]) 515 | with c0: 516 | btn_refresh = st.button('Refresh') 517 | with c1: 518 | btn_add = st.button(" Add ", key="insert_note") 519 | with c2: 520 | btn_update = st.button("Update", key="update_note") 521 | with c3: 522 | btn_delete = st.button("Delete", key="delete_note") 523 | 524 | if btn_refresh: 525 | return 526 | 527 | if btn_add and selected_row is None: 528 | data = { 529 | "uuid": str(uuid4()), 530 | "ts": str(datetime.now()), 531 | "topic": topic, 532 | "url": url, 533 | "comment": comment, 534 | } 535 | _insert_note(data) 536 | 537 | if btn_update and selected_row is not None: 538 | _update_note(data) 539 | 540 | if btn_delete and selected_row is not None: 541 | _delete_note(data) 542 | 543 | def _execute_code_sql(code): 544 | with DBConn(CFG["DB_FILE"]) as _conn: 545 | if code.strip().lower().startswith("select"): 546 | df = pd.read_sql(code, _conn) 547 | st.dataframe(df) 548 | elif code.strip().split(" ")[0].lower() in ["create", "insert","update", "delete", "drop"]: 549 | cur = _conn.cursor() 550 | cur.executescript(code) 551 | _conn.commit() 552 | 553 | 554 | 555 | def _execute_code_python(code): 556 | # https://stackoverflow.com/questions/11914472/how-to-use-stringio-in-python3 557 | # create file-like string to capture output 558 | codeOut = StringIO() 559 | codeErr = StringIO() 560 | # capture output and errors 561 | sys.stdout = codeOut 562 | sys.stderr = codeErr 563 | # https://stackoverflow.com/questions/54840271/why-do-i-get-nameerror-name-is-not-defined-with-exec 564 | # use globals() 565 | exec(compile(code, "", 'exec'), globals(), globals()) 566 | # restore stdout and stderr 567 | sys.stdout = sys.__stdout__ 568 | sys.stderr = sys.__stderr__ 569 | if codeOut and codeOut.getvalue(): 570 | st.info(codeOut.getvalue()) 571 | if codeErr and codeErr.getvalue(): 572 | st.error(codeErr.getvalue()) 573 | 574 | def _execute_code(gen_code, use_case): 575 | try: 576 | if use_case == "sql": 577 | _execute_code_sql(gen_code) 578 | st.info("Execution successful!") 579 | elif use_case == "python": 580 | _execute_code_python(gen_code) 581 | st.info("Execution successful!") 582 | elif use_case == "javascript": 583 | st.info("You can use browser developer console to validate JavaScript code") 584 | except: 585 | st.error(f"Execution failed:\n {format_exc()}") 586 | ##################################################### 587 | # Menu Handlers 588 | ##################################################### 589 | def do_welcome(): 590 | # st.subheader(f"{_STR_MENU_HOME}") 591 | st.header("What is GPT-3?") 592 | 593 | st.markdown(f""" 594 | [GPT-3](https://www.techtarget.com/searchenterpriseai/definition/GPT-3) or the third generation Generative Pre-trained Transformer, is a huge neural network machine learning model with 175 billion parameters trained on internet data to generate any type of text. Developed by [OpenAI](https://openai.com/), it requires a small amount of input text to generate large volumes of relevant and sophisticated machine-generated text. 595 | - [Overview](https://beta.openai.com/) 596 | - [Documentation](https://beta.openai.com/docs) 597 | - [Examples](https://beta.openai.com/examples) 598 | - [Playground](https://beta.openai.com/playground?mode=complete) 599 | - [Community](https://community.openai.com/) 600 | - [Wikipedia](https://en.wikipedia.org/wiki/GPT-3) 601 | 602 | This [Streamlit App](https://streamlit.io/) helps explore [GPT-3 Codex capability](https://beta.openai.com/docs/guides/code/introduction) for code generation ([src]({SRC_URL})) 603 | - Experiment with GPT-3 capability via API call locally; 604 | - Validate generated SQL against a [SQLite sample dataset](https://www.sqlitetutorial.net/sqlite-sample-database/); 605 | - Validate generated Python because Streamlit is built on python; 606 | - Validate generated JavaScript in native brower developer console; 607 | 608 | Instead of `code-davinci-002` model, however, one can perform many tasks by choosing `text-davinci-003` or other models. 609 | 610 | #### [OpenAI models](https://beta.openai.com/docs/models/overview) 611 | 612 | """, unsafe_allow_html=True) 613 | file_name = "../docs/openai_models.csv" 614 | if exists(file_name): 615 | df = pd.read_csv(file_name, header=0, sep='|') 616 | st.table(df) 617 | 618 | file_name = "../docs/gen_code.png" 619 | if exists(file_name): 620 | st.subheader("Generate Code") 621 | st.image(file_name) 622 | 623 | file_name = "../docs/edit-fix-error.png" 624 | if exists(file_name): 625 | st.subheader("Fix Code") 626 | st.image(file_name) 627 | 628 | def do_code_gen_run(): 629 | st.subheader(f"{_STR_MENU_SQL_GEN}") 630 | with st.expander("OpenAI playground: ", expanded=True): 631 | do_code_gen(show_response=False, show_header=False) 632 | 633 | st.subheader(f"{_STR_MENU_SQL_RUN}") 634 | do_code_run(show_header=False) 635 | 636 | def do_code_gen(show_response=True, show_header=True): 637 | if show_header: 638 | st.subheader(f"{_STR_MENU_SQL_GEN}") 639 | 640 | OPENAI_API_KEY = KEY.get("OPENAI_API_KEY", {}) 641 | if not OPENAI_API_KEY: 642 | st.error("OPENAI_API_KEY is missing, signup with GPT-3 at https://beta.openai.com/ and add your API_KEY to settings") 643 | return 644 | 645 | openai.api_key = OPENAI_API_KEY 646 | openai_mode = st.session_state.get("openai_mode") if "openai_mode" in st.session_state else CFG["Mode"][0] 647 | if openai_mode != "Complete": 648 | st.error(f"OpenAI mode {openai_mode} not yet implemented") 649 | return 650 | 651 | c1,c2,_,c3 = st.columns([2,2,1,6]) 652 | with c1: 653 | if "openai_model_2" in st.session_state: 654 | selected_model = st.session_state.get("openai_model_2") 655 | elif "openai_model" in st.session_state: 656 | selected_model = st.session_state.get("openai_model") 657 | else: 658 | selected_model = CFG["Model"][0] 659 | openai_model = st.selectbox("Model", CFG["Model"], index=CFG["Model"].index(selected_model), key="openai_model_2") 660 | with c2: 661 | if "openai_use_case_2" in st.session_state: 662 | selected_use_case = st.session_state.get("openai_use_case_2") 663 | elif "openai_use_case" in st.session_state: 664 | selected_use_case = st.session_state.get("openai_use_case") 665 | else: 666 | selected_use_case = CFG["Use_case"][0] 667 | openai_use_case = st.selectbox("Use case", CFG["Use_case"], index=CFG["Use_case"].index(selected_use_case), key="openai_use_case_2") 668 | with c3: 669 | st.info("""For non-code-generation use cases, 670 | choose text-davinci-002 model.""") 671 | 672 | c_1, c_2, _, _, _, _ = st.columns(6) 673 | with c_1: 674 | insert_prompts = st.checkbox(f"insert delimitor {PROMPT_DELIMITOR}", value=True) 675 | with c_2: 676 | remove_leading_hash = st.checkbox(f"remove leading #", value=False) 677 | 678 | prompt_value = EXAMPLE_PROMPT.get(openai_use_case, "") 679 | if insert_prompts: 680 | prompt_value = f"{PROMPT_DELIMITOR}\n" + prompt_value + f"\n{PROMPT_DELIMITOR}\n\n\n" 681 | prompt = st.text_area(f"Prompt: (example delimitors: {str(PROMPT_LIST)}", value=prompt_value, height=200) 682 | prompt_s = '\n'.join([i.strip() for i in prompt.split('\n') if i.strip()]) 683 | # st.write(prompt) 684 | if st.button("Submit"): 685 | print(f"model = {openai_model}, use case = {openai_use_case}") 686 | openai_temperature = st.session_state.get("openai_temperature", 0) 687 | openai_maximum_length = st.session_state.get("openai_maximum_length", 256) 688 | openai_top_p = st.session_state.get("openai_top_p", 1.0) 689 | openai_frequency_penalty = st.session_state.get("openai_frequency_penalty", 0) 690 | openai_presence_penalty = st.session_state.get("openai_presence_penalty", 0) 691 | settings_dict = { 692 | "Mode": openai_mode, 693 | "Model": openai_model, 694 | "Use_case": openai_use_case, 695 | "Temperature": openai_temperature, 696 | "Maximum_length": openai_maximum_length, 697 | "Top_p": openai_top_p, 698 | "Frequency_penalty": openai_frequency_penalty, 699 | "Presence_penalty": openai_presence_penalty, 700 | } 701 | 702 | if insert_prompts and PROMPT_DELIMITOR not in prompt_s: 703 | prompt_str = f"{PROMPT_DELIMITOR}\n" + prompt_s + f"\n{PROMPT_DELIMITOR}\n\n\n" 704 | else: 705 | prompt_str = prompt_s 706 | 707 | if remove_leading_hash: 708 | prompt_str = _remove_leading_hash(prompt_str) 709 | 710 | # st.info(settings_dict) 711 | 712 | try: 713 | response = openai.Completion.create( 714 | model=openai_model, 715 | prompt=prompt_str, 716 | temperature=openai_temperature, 717 | max_tokens=openai_maximum_length, 718 | top_p=openai_top_p, 719 | frequency_penalty=openai_frequency_penalty, 720 | presence_penalty=openai_presence_penalty 721 | ) 722 | resp_str = response["choices"][0]["text"] 723 | st.session_state["GENERATED_CODE"] = resp_str 724 | _insert_log(use_case=openai_use_case, settings=str(settings_dict), prompt=prompt_str, output=resp_str) 725 | if show_response: 726 | st.write("Response:") 727 | st.info(resp_str) 728 | except: 729 | st.error(format_exc()) 730 | 731 | def do_code_run(show_header=True): 732 | if show_header: 733 | st.subheader(f"{_STR_MENU_SQL_RUN}") 734 | 735 | _display_grid_gpt3_log() 736 | 737 | selected_row = st.session_state.get("LOG_SELECTED_ROW", None) 738 | gen_code = st.session_state.get("GENERATED_CODE", None) 739 | if selected_row is None and gen_code is None: 740 | return 741 | 742 | if selected_row is not None: 743 | use_case = selected_row.get("use_case") 744 | output = selected_row.get("output") 745 | else: 746 | use_case = "SQL" 747 | output = None 748 | gen_code_val = gen_code or output 749 | gen_code = st.text_area("Generated Code:", value=gen_code_val, height=200) 750 | 751 | exec_engines = ["sql", "python", "javascript"] 752 | if use_case.lower() not in exec_engines: 753 | # st.warning(f"Executing {use_case} is not implemented, please use language-specific interpreter to validate the code.") 754 | return 755 | 756 | selected_use_case = st.selectbox("Run Code (Note: SQL/Python/JavaScript can be validated here, although error may occur due to un-met dependency.)", exec_engines, index=exec_engines.index(use_case.lower())) 757 | btn_label = { 758 | "sql" : "Run SQL ...", 759 | "python" : "Run Python ...", 760 | "javascript" : "Run JavaScript ...", 761 | } 762 | if gen_code and st.button(btn_label[selected_use_case]): 763 | _execute_code(gen_code, selected_use_case) 764 | 765 | def do_sqlite_sample_db(): 766 | st.subheader(f"{_STR_MENU_SQLITE_SAMPLE}") 767 | tables = _get_tables() 768 | idx_default = tables.index("customers") if "customers" in tables else 0 769 | schema_value = st.session_state.get("TABLE_SCHEMA", "") 770 | c1, _, c2 = st.columns([5,1,8]) 771 | with c1: 772 | table_name = st.selectbox("Table:", tables, index=idx_default, key="table_name") 773 | if st.button("Show schema"): 774 | with DBConn(CFG["DB_FILE"]) as _conn: 775 | df_schema = pd.read_sql(f"select sql from sqlite_schema where name = '{table_name}'; ", _conn) 776 | schema_value = df_schema["sql"].to_list()[0] 777 | st.session_state.update({"TABLE_SCHEMA" : schema_value}) 778 | 779 | with c2: 780 | st.text_area("Schema:", value=schema_value, height=150) 781 | 782 | sql_stmt = st.text_area("SQL:", value=f"select * from {table_name} limit 10;", height=100) 783 | if st.button("Execute Query ..."): 784 | try: 785 | _execute_code_sql(code=sql_stmt) 786 | except: 787 | st.error(format_exc()) 788 | 789 | 790 | def do_settings(): 791 | st.subheader(f"{_STR_MENU_SETTINGS}") 792 | 793 | # with st.form(key="settings"): 794 | OPENAI_MODES = CFG["Mode"] # ["Complete", "Insert", "Edit"] 795 | OPENAI_MODELS = CFG["Model"] # ["davinci-instruct-beta", "text-davinci-002", "text-davinci-001"] 796 | OPENAI_USE_CASES = CFG["Use_case"] 797 | st.selectbox("Mode", options=OPENAI_MODES, key="openai_mode") 798 | st.selectbox("Model", options=OPENAI_MODELS, key="openai_model") 799 | st.selectbox("Use case", options=OPENAI_USE_CASES, key="openai_use_case") 800 | st.slider("Temperature", min_value=0.0, max_value=1.0, step=0.01, value=CFG["Temperature"], key="openai_temperature") 801 | st.slider("Maximum length", min_value=1, max_value=2048, step=1, value=CFG["Maximum_length"], key="openai_maximum_length") 802 | st.slider("Top_p", min_value=0.0, max_value=1.0, step=0.01, value=CFG["Top_p"], key="openai_top_p") 803 | st.slider("Frequency_penalty", min_value=0.0, max_value=1.0, step=0.01, value=CFG["Frequency_penalty"], key="openai_frequency_penalty") 804 | st.slider("Presence_penalty", min_value=0.0, max_value=1.0, step=0.01, value=CFG["Presence_penalty"], key="openai_presence_penalty") 805 | st.text_input("Input prefix", value=CFG["Input_prefix"], key="openai_input_prefix") 806 | st.text_input("Input suffix", value=CFG["Input_suffix"], placeholder="1 newline char", key="openai_input_suffix") 807 | st.text_input("Output prefix", value=CFG["Output_prefix"], key="openai_output_prefix") 808 | st.text_input("Output suffix", value=CFG["Output_suffix"], placeholder="2 newline chars", key="openai_output_suffix") 809 | st.text_input("SQLite DB File", value=CFG["DB_FILE"], key="sqlite_db_file") 810 | st.text_input("API Key File", value=CFG["API_KEY_FILE"], key="api_key_file") 811 | st.text_input("OpenAI API Key", value=KEY.get("OPENAI_API_KEY", ""), key="openai_api_key") 812 | # st.form_submit_button('Save settings', on_click=_save_settings) 813 | if st.button('Save settings'): 814 | _save_settings() 815 | 816 | with st.expander("Review settings Yaml files:", expanded=False): 817 | st.write(f"File: cfg/settings.yaml") 818 | st.write(CFG) 819 | st.write(f"File: {CFG['API_KEY_FILE']}") 820 | st.write(KEY) 821 | 822 | 823 | def do_notes(): 824 | st.subheader(f"{_STR_MENU_NOTES}") 825 | _display_grid_notes() 826 | 827 | 828 | 829 | ##################################################### 830 | # setup menu_items 831 | ##################################################### 832 | menu_dict = { 833 | _STR_MENU_HOME : {"fn": do_welcome}, 834 | _STR_MENU_SQL_GEN_RUN: {"fn": do_code_gen_run}, 835 | # _STR_MENU_SQL_GEN: {"fn": do_code_gen}, 836 | # _STR_MENU_SQL_RUN: {"fn": do_code_run}, 837 | _STR_MENU_SQLITE_SAMPLE: {"fn": do_sqlite_sample_db}, 838 | _STR_MENU_SETTINGS: {"fn": do_settings}, 839 | _STR_MENU_NOTES: {"fn": do_notes}, 840 | } 841 | 842 | ## sidebar Menu 843 | def do_sidebar(): 844 | menu_options = list(menu_dict.keys()) 845 | default_ix = menu_options.index(_STR_MENU_HOME) 846 | 847 | with st.sidebar: 848 | st.markdown(f"

{_STR_APP_NAME}

",unsafe_allow_html=True) 849 | 850 | menu_item = st.selectbox("Menu:", menu_options, index=default_ix, key="menu_item") 851 | # keep menu item in the same order as i18n strings 852 | 853 | if menu_item in menu_options: 854 | pass 855 | 856 | # body 857 | def do_body(): 858 | menu_item = st.session_state.get("menu_item", _STR_MENU_HOME) 859 | menu_dict[menu_item]["fn"]() 860 | 861 | def main(): 862 | _load_settings() 863 | # st.write(CFG) 864 | do_sidebar() 865 | do_body() 866 | 867 | if __name__ == '__main__': 868 | main() 869 | -------------------------------------------------------------------------------- /app/cfg/settings.yaml: -------------------------------------------------------------------------------- 1 | API_KEY_FILE: cfg/api_key.yaml 2 | DB_FILE: db/gpt3sql.sqlite 3 | Frequency_penalty: 0.0 4 | Input_prefix: 'input: ' 5 | Input_suffix: ' 6 | 7 | ' 8 | Maximum_length: 2024 9 | Mode: 10 | - Complete 11 | Model: 12 | - text-davinci-002 13 | - text-davinci-001 14 | - text-curie-001 15 | - text-babbage-001 16 | - text-ada-001 17 | Output_prefix: 'output: ' 18 | Output_suffix: ' 19 | 20 | 21 | ' 22 | Presence_penalty: 0.0 23 | Temperature: 0.49 24 | Top_p: 1.0 25 | Use_case: 26 | - Python 27 | - SQL 28 | - AWS 29 | - Explain 30 | - Ask_Anything 31 | - Math 32 | - Science 33 | - Others 34 | - JavaScript 35 | - Go 36 | - Perl 37 | - PHP 38 | - R 39 | - Ruby 40 | - Swift 41 | - TypeScript 42 | - Shell 43 | -------------------------------------------------------------------------------- /app/db/gpt3sql.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgong/gpt3sql/d2db63b13e707b7680098ded15efb35d9b058c85/app/db/gpt3sql.sqlite -------------------------------------------------------------------------------- /app/db/merge_db.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "Merge `GPT-3 log` data between 2 sqlite databases" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 1, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import sqlite3\n", 17 | "import pandas as pd\n", 18 | "\n", 19 | "DELIMITOR = \",\"\n", 20 | "\n", 21 | "class DBConn(object):\n", 22 | " def __init__(self, db_file):\n", 23 | " self.conn = sqlite3.connect(db_file)\n", 24 | " def __enter__(self):\n", 25 | " return self.conn\n", 26 | " def __exit__(self, type, value, traceback):\n", 27 | " self.conn.close()\n", 28 | "\n", 29 | "def get_data(db_file, table_name):\n", 30 | " with DBConn(db_file) as _conn:\n", 31 | " sql_stmt = f'''\n", 32 | " SELECT uuid||'{DELIMITOR}'||ts as uuid_ts from {table_name};\n", 33 | " '''\n", 34 | " return pd.read_sql(sql_stmt, _conn)\n", 35 | "\n", 36 | "def list2sql_str(l):\n", 37 | " \"\"\"convert a list into SQL in string\n", 38 | " \"\"\"\n", 39 | " return str(l).replace(\"[\", \"(\").replace(\"]\", \")\")\n", 40 | "\n", 41 | "def merge_logs(src_db, tgt_db=\"gpt3sql.sqlite\", table_name=\"t_gpt3_log\"):\n", 42 | " \n", 43 | " df_src = get_data(src_db, table_name)\n", 44 | " df_tgt = get_data(tgt_db, table_name)\n", 45 | "\n", 46 | " uuid_ts_src = df_src[\"uuid_ts\"].to_list()\n", 47 | " uuid_ts_tgt = df_tgt[\"uuid_ts\"].to_list()\n", 48 | "\n", 49 | " uuid_update = [i.split(DELIMITOR)[0] for i in (set(uuid_ts_src) - set(uuid_ts_tgt))]\n", 50 | "\n", 51 | " if uuid_update:\n", 52 | "\n", 53 | " # fetch new/updated row from src\n", 54 | " with DBConn(src_db) as _conn:\n", 55 | " sql_stmt = f'''\n", 56 | " SELECT * from {table_name} where uuid in {list2sql_str(uuid_update)} ;\n", 57 | " '''\n", 58 | " df_src = pd.read_sql(sql_stmt, _conn)\n", 59 | "\n", 60 | " with DBConn(tgt_db) as _conn:\n", 61 | " # remove old rows in tgt\n", 62 | " delete_sql = f\"\"\"\n", 63 | " delete from {table_name} where uuid in {list2sql_str(uuid_update)} ;\n", 64 | " \"\"\"\n", 65 | " cur = _conn.cursor()\n", 66 | " cur.executescript(delete_sql)\n", 67 | " _conn.commit()\n", 68 | "\n", 69 | " # append to tgt\n", 70 | " df_src.to_sql(table_name, _conn, if_exists='append', index=False)\n", 71 | " _conn.commit() \n", 72 | "\n", 73 | " return uuid_update" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 2, 79 | "metadata": {}, 80 | "outputs": [ 81 | { 82 | "name": "stdout", 83 | "output_type": "stream", 84 | "text": [ 85 | "['aab944b6-1b07-4738-92b8-230564ad49c0', '78d7f638-0b42-415a-a5b8-b093135a6034', '600d4d62-b165-43bd-85e0-dc962f50c3cd', 'b2e174f1-82a6-470b-ad4e-1967e142687f', 'fc2fed9c-af94-4098-b011-c5995fd5fb41', '90461d42-44bb-4028-9eef-123b34c84e61', '757a57d4-e7b0-4159-a6a0-abd961511498']\n" 86 | ] 87 | } 88 | ], 89 | "source": [ 90 | "src_db = \"gpt3sql-20221123.sqlite\"\n", 91 | "uuid_update = merge_logs(src_db)\n", 92 | "print(uuid_update)" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": null, 98 | "metadata": {}, 99 | "outputs": [], 100 | "source": [] 101 | } 102 | ], 103 | "metadata": { 104 | "kernelspec": { 105 | "display_name": "Python 3", 106 | "language": "python", 107 | "name": "python3" 108 | }, 109 | "language_info": { 110 | "codemirror_mode": { 111 | "name": "ipython", 112 | "version": 3 113 | }, 114 | "file_extension": ".py", 115 | "mimetype": "text/x-python", 116 | "name": "python", 117 | "nbconvert_exporter": "python", 118 | "pygments_lexer": "ipython3", 119 | "version": "3.8.5" 120 | }, 121 | "toc": { 122 | "base_numbering": 1, 123 | "nav_menu": {}, 124 | "number_sections": true, 125 | "sideBar": true, 126 | "skip_h1_title": false, 127 | "title_cell": "Table of Contents", 128 | "title_sidebar": "Contents", 129 | "toc_cell": false, 130 | "toc_position": {}, 131 | "toc_section_display": true, 132 | "toc_window_display": false 133 | } 134 | }, 135 | "nbformat": 4, 136 | "nbformat_minor": 4 137 | } 138 | -------------------------------------------------------------------------------- /app/db/merge_db.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | 4 | # Merge `GPT-3 log` data between 2 sqlite databases 5 | 6 | import sys 7 | import sqlite3 8 | import pandas as pd 9 | 10 | DELIMITOR = "," 11 | 12 | class DBConn(object): 13 | def __init__(self, db_file): 14 | self.conn = sqlite3.connect(db_file) 15 | def __enter__(self): 16 | return self.conn 17 | def __exit__(self, type, value, traceback): 18 | self.conn.close() 19 | 20 | def get_data(db_file, table_name): 21 | with DBConn(db_file) as _conn: 22 | sql_stmt = f''' 23 | SELECT uuid||'{DELIMITOR}'||ts as uuid_ts from {table_name}; 24 | ''' 25 | return pd.read_sql(sql_stmt, _conn) 26 | 27 | def list2sql_str(l): 28 | """convert a list into SQL in string 29 | """ 30 | return str(l).replace("[", "(").replace("]", ")") 31 | 32 | def merge_logs(src_db, tgt_db="gpt3sql.sqlite", table_name="t_gpt3_log"): 33 | 34 | df_src = get_data(src_db, table_name) 35 | df_tgt = get_data(tgt_db, table_name) 36 | 37 | uuid_ts_src = df_src["uuid_ts"].to_list() 38 | uuid_ts_tgt = df_tgt["uuid_ts"].to_list() 39 | 40 | uuid_update = [i.split(DELIMITOR)[0] for i in (set(uuid_ts_src) - set(uuid_ts_tgt))] 41 | 42 | if uuid_update: 43 | 44 | # fetch new/updated row from src 45 | with DBConn(src_db) as _conn: 46 | sql_stmt = f''' 47 | SELECT * from {table_name} where uuid in {list2sql_str(uuid_update)} ; 48 | ''' 49 | df_src = pd.read_sql(sql_stmt, _conn) 50 | 51 | with DBConn(tgt_db) as _conn: 52 | # remove old rows in tgt 53 | delete_sql = f""" 54 | delete from {table_name} where uuid in {list2sql_str(uuid_update)} ; 55 | """ 56 | cur = _conn.cursor() 57 | cur.executescript(delete_sql) 58 | _conn.commit() 59 | 60 | # append to tgt 61 | df_src.to_sql(table_name, _conn, if_exists='append', index=False) 62 | _conn.commit() 63 | 64 | return uuid_update 65 | 66 | 67 | if __name__ == "__main__": 68 | if len(sys.argv) > 1: 69 | src_db = sys.argv[1] 70 | uuid_update = merge_logs(src_db) 71 | if uuid_update: 72 | print(f"Merged the following records from '{src_db}' DB:\n\t{uuid_update}") 73 | else: 74 | print("Nothing to merge") 75 | else: 76 | print("[Error] source DB file missing!") 77 | 78 | 79 | 80 | 81 | -------------------------------------------------------------------------------- /app/db/sqlite.sql: -------------------------------------------------------------------------------- 1 | select * from t_gpt3_log limit 10; 2 | -- where input is not null and input != ''; 3 | 4 | 5 | create table t_resource ( 6 | uuid TEXT NOT NULL, 7 | ts text, 8 | topic text, 9 | url text, 10 | comment text 11 | ); 12 | create unique index idx_resource on t_resource(uuid); 13 | select ts,topic,url,comment,uuid from T_RESOURCE order by ts desc; 14 | insert into t_resource (uuid, ts, topic, url, comment) 15 | values 16 | ('400f2586-2d36-4bc7-b762-b73971d8a267', '2022-10-17 16:15:15.734865', 'openai-gpt3', 'https://beta.openai.com/docs/guides/code/introduction', ''), 17 | ('4c2d84b5-8594-4d15-a3ea-8259e03436de', '2022-10-19 21:04:04.485476', 'python', 'https://sparkbyexamples.com/pyspark-tutorial/', 'good resource for pyspark') 18 | ; 19 | 20 | 21 | update t_gpt3_log set use_case='SQL'; 22 | 23 | create table t_gpt3_log ( 24 | uuid TEXT NOT NULL, 25 | ts text, 26 | use_case text, 27 | settings text, 28 | prompt text, 29 | input text, 30 | output text, 31 | comment text 32 | ); 33 | 34 | alter table t_gpt3_log add column valid_output text; 35 | alter table t_gpt3_log drop column input; 36 | 37 | create unique index idx_gpt3_log on t_gpt3_log(uuid); 38 | 39 | 40 | 41 | 42 | SELECT 43 | name 44 | FROM 45 | sqlite_schema 46 | WHERE 47 | type ='table' AND 48 | name NOT LIKE 'sqlite_%'; -------------------------------------------------------------------------------- /docs/0-about.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgong/gpt3sql/d2db63b13e707b7680098ded15efb35d9b058c85/docs/0-about.PNG -------------------------------------------------------------------------------- /docs/1-gen-sql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgong/gpt3sql/d2db63b13e707b7680098ded15efb35d9b058c85/docs/1-gen-sql.png -------------------------------------------------------------------------------- /docs/2-run-sql.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgong/gpt3sql/d2db63b13e707b7680098ded15efb35d9b058c85/docs/2-run-sql.PNG -------------------------------------------------------------------------------- /docs/3-sample-data.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgong/gpt3sql/d2db63b13e707b7680098ded15efb35d9b058c85/docs/3-sample-data.PNG -------------------------------------------------------------------------------- /docs/4-config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgong/gpt3sql/d2db63b13e707b7680098ded15efb35d9b058c85/docs/4-config.png -------------------------------------------------------------------------------- /docs/edit-fix-error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgong/gpt3sql/d2db63b13e707b7680098ded15efb35d9b058c85/docs/edit-fix-error.png -------------------------------------------------------------------------------- /docs/gen_code.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/wgong/gpt3sql/d2db63b13e707b7680098ded15efb35d9b058c85/docs/gen_code.png -------------------------------------------------------------------------------- /docs/openai_models.csv: -------------------------------------------------------------------------------- 1 | Model Name| Description| Strength 2 | "text-davinci-003"| "It produces higher quality writing. This will help your applications deliver clearer, more engaging, and more compelling content; It can handle more complex instructions, meaning you can get even more creative with how you make use of its capabilities now; It’s better at longer form content generation, allowing you to take on tasks that would have previously been too difficult to achieve. " | "" 3 | "text-davinci-002"| "Most capable model in the GPT-3 series. Can perform any task the other models can, often with less context. It can process up to 4000 tokens per request."| "complex intent, cause and effect, creative generation, search, summarization for audience." 4 | "text-davinci-001"| "Older version of text-davinci-002 model"| "complex intent, cause and effect, creative generation, search, summarization for audience." 5 | "text-curie-001"| "Very capable, but faster and lower cost than text-davinci-002 model", "Language translation, complext classification, sentiment, summarization." 6 | "text-babbage-001"| "capable of straightforward tasks, very faster and lower cost"| "Moderate classification, semantic search." 7 | "text-ada-001"| "capable of simple tasks, usually the fastest model in the GPT-3 series, and lowest cost"| "Parsing text, simple classification, address correction, keywords." 8 | "davinci-instruct-beta"| "This is an older model, use text-davinci-002 instead"| "shorter and more naturally phrased prompts, complex intent, cause and effect." 9 | "code-davinci-002"| "Most capable model in the Codex series, which can understand and generate code, including translating natural language to code. It can process up to 4000 tokens per request" | "OpenAI JavaScript Sandbox demo application uses this model to translate instructions into JavaScript" 10 | "code-cushman-001"| "Almost as capable as code-davinci-002, but slightly faster."| "Real-time applications where low latency is preferable" 11 | "code-davinci-001"| "older version of Codex"| "" 12 | -------------------------------------------------------------------------------- /examples/codex-quickstart.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "## Code Completion\n", 8 | "\n", 9 | "- [Quick Start](https://beta.openai.com/docs/guides/code/quickstart)\n", 10 | "- [More Examples](https://beta.openai.com/examples?category=code)\n", 11 | "\n", 12 | "Request to be added on Codex beta waitlist,\n", 13 | "\n", 14 | "The following examples use `text-davinci-002` model, which is less optimal than `codex`" 15 | ] 16 | }, 17 | { 18 | "cell_type": "markdown", 19 | "metadata": {}, 20 | "source": [ 21 | "### Saying \"Hello\" (Python)" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": 5, 27 | "metadata": {}, 28 | "outputs": [ 29 | { 30 | "name": "stdout", 31 | "output_type": "stream", 32 | "text": [ 33 | "What is your name?\n", 34 | "John\n", 35 | "Hello, John\n" 36 | ] 37 | } 38 | ], 39 | "source": [ 40 | "\"\"\"\n", 41 | "Ask the user for their name and say \"Hello\"\n", 42 | "\"\"\"\n", 43 | "print(\"What is your name?\")\n", 44 | "\n", 45 | "name = input()\n", 46 | "\n", 47 | "print(\"Hello, \" + name)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "markdown", 52 | "metadata": {}, 53 | "source": [ 54 | "### Create random names (Python)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "raw", 59 | "metadata": {}, 60 | "source": [ 61 | "\"\"\"\n", 62 | "Generate random names in python:\n", 63 | "1. Create a list of first names\n", 64 | "2. Create a list of last names\n", 65 | "3. Combine them randomly into a list of 100 full names \n", 66 | "\"\"\"\n", 67 | "from random import randint # added my me\n", 68 | "\n", 69 | "names = [\"John\", \"Mary\", \"Jane\", \"Bob\"]\n", 70 | "lastnames = [\"Smith\", \"Jones\", \"Williams\"]\n", 71 | "\n", 72 | "fullnames = []\n", 73 | "\n", 74 | "for i in range(100):\n", 75 | "\n", 76 | " fullnames.append(names[randint(0, len(names)-1)] + lastnames[randint(0, len(lastnames)-1)])\n", 77 | "\n", 78 | "print(fullnames)" 79 | ] 80 | }, 81 | { 82 | "cell_type": "code", 83 | "execution_count": 10, 84 | "metadata": {}, 85 | "outputs": [ 86 | { 87 | "name": "stdout", 88 | "output_type": "stream", 89 | "text": [ 90 | "0\n", 91 | "1\n", 92 | "2\n", 93 | "3\n", 94 | "4\n", 95 | "5\n", 96 | "6\n", 97 | "7\n", 98 | "8\n", 99 | "9\n", 100 | "10\n" 101 | ] 102 | } 103 | ], 104 | "source": [ 105 | "# Python 3\n", 106 | "# Create a function to count to 10\n", 107 | "\n", 108 | "def counter():\n", 109 | " for i in range(11):\n", 110 | " print(i)\n", 111 | "\n", 112 | "counter()" 113 | ] 114 | }, 115 | { 116 | "cell_type": "raw", 117 | "metadata": {}, 118 | "source": [ 119 | "# Python 3\n", 120 | "# Calculate the mean distance between an array of points and their cluster centroid\n", 121 | "def get_means(points, clusters, labels):\n", 122 | " means = []\n", 123 | " for cluster in clusters:\n", 124 | " if len(cluster) == 0:\n", 125 | " means.append(0)\n", 126 | " else:\n", 127 | " mean = np.mean(cluster, axis=0)\n", 128 | " means.append(mean)\n", 129 | " return means\n", 130 | "\n", 131 | "# Calculate the average distance of the points from their cluster centroid\n", 132 | "def calculate_means(points, clusters, labels):\n", 133 | " means = get_means(points, clusters, labels)\n", 134 | " \n", 135 | " # Get the mean of the distances of each point to its cluster centroid\n", 136 | " mean_distances = []\n", 137 | " for i in range(0, len(points)):\n", 138 | " mean_distance = get_distance(points[i], means[labels[i]])\n", 139 | " mean_distances.append(mean_distance)\n", 140 | " return np.mean(mean_distances)" 141 | ] 142 | }, 143 | { 144 | "cell_type": "markdown", 145 | "metadata": {}, 146 | "source": [ 147 | "### Creat a query" 148 | ] 149 | }, 150 | { 151 | "cell_type": "raw", 152 | "metadata": {}, 153 | "source": [ 154 | "# Table albums, columns = [AlbumId, Title, ArtistId]\n", 155 | "# Table artists, columns = [ArtistId, Name]\n", 156 | "# Table media_types, columns = [MediaTypeId, Name]\n", 157 | "# Table playlists, columns = [PlaylistId, Name]\n", 158 | "# Table playlist_track, columns = [PlaylistId, TrackId]\n", 159 | "# Table tracks, columns = [TrackId, Name, AlbumId, MediaTypeId, GenreId, Composer, Milliseconds, Bytes, UnitPrice]\n", 160 | "\n", 161 | "# Create a query for all albums by Adele\n", 162 | "\n", 163 | "Output:\n", 164 | "\n", 165 | "SELECT AlbumId, Title, ArtistId\n", 166 | "FROM Albums\n", 167 | "WHERE ArtistId = 'Adele'" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "metadata": {}, 173 | "source": [ 174 | "### Create a MySQL query (Python)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": 7, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "\"\"\"\n", 184 | "Table customers, columns = [CustomerId, FirstName, LastName, Company, Address, City, State, Country, PostalCode, Phone, Fax, Email, SupportRepId]\n", 185 | "Create a MySQL query for all customers in Texas named Jane\n", 186 | "\"\"\"\n", 187 | "query = \"\"\"\n", 188 | "SELECT * FROM customers\n", 189 | "WHERE State='TX'\n", 190 | "AND FirstName='Jane'\n", 191 | "\"\"\"" 192 | ] 193 | }, 194 | { 195 | "cell_type": "markdown", 196 | "metadata": {}, 197 | "source": [ 198 | "The following is a Python script that will create a MySQL table called \"customers\" with the following columns: CustomerId, FirstName, LastName, Company, Address, City, State, Country, PostalCode, Phone, Fax, Email, SupportRepId." 199 | ] 200 | }, 201 | { 202 | "cell_type": "code", 203 | "execution_count": 11, 204 | "metadata": {}, 205 | "outputs": [ 206 | { 207 | "ename": "ModuleNotFoundError", 208 | "evalue": "No module named 'MySQLdb'", 209 | "output_type": "error", 210 | "traceback": [ 211 | "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", 212 | "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", 213 | "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0mMySQLdb\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mconn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mMySQLdb\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconnect\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"localhost\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"root\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"\"\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;34m\"customers\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mcursor\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mconn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcursor\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m cursor.execute(\"\"\"\n\u001b[0;32m 5\u001b[0m \u001b[0mCREATE\u001b[0m \u001b[0mTABLE\u001b[0m \u001b[0mcustomers\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", 214 | "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'MySQLdb'" 215 | ] 216 | } 217 | ], 218 | "source": [ 219 | "import MySQLdb\n", 220 | "conn = MySQLdb.connect(\"localhost\",\"root\",\"\",\"customers\")\n", 221 | "cursor = conn.cursor()\n", 222 | "cursor.execute(\"\"\"\n", 223 | "CREATE TABLE customers\n", 224 | "(\n", 225 | "CustomerId INT UNSIGNED AUTO_INCREMENT PRIMARY KEY,\n", 226 | "FirstName VARCHAR(255),\n", 227 | "LastName VARCHAR(255),\n", 228 | "Company VARCHAR(255),\n", 229 | "Address VARCHAR(255),\n", 230 | "City VARCHAR(255),\n", 231 | "State VARCHAR(255),\n", 232 | "Country VARCHAR(255),\n", 233 | "PostalCode VARCHAR(255),\n", 234 | "Phone VARCHAR(255),\n", 235 | "Fax VARCHAR(255),\n", 236 | "Email VARCHAR(255),\n", 237 | "SupportRepId INT\n", 238 | ")\n", 239 | "\"\"\")\n", 240 | "conn.commit()\n", 241 | "conn.close()" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "### Explaining code (JavaScript)" 249 | ] 250 | }, 251 | { 252 | "cell_type": "raw", 253 | "metadata": {}, 254 | "source": [ 255 | "// Function 1\n", 256 | "var fullNames = [];\n", 257 | "for (var i = 0; i < 50; i++) {\n", 258 | " fullNames.push(names[Math.floor(Math.random() * names.length)]\n", 259 | " + \" \" + lastNames[Math.floor(Math.random() * lastNames.length)]);\n", 260 | "}\n", 261 | "\n", 262 | "// What does Function 1 do?\n", 263 | "\n", 264 | "Output:\n", 265 | "\n", 266 | "// Function 1 creates a list of 50 first and last names" 267 | ] 268 | }, 269 | { 270 | "cell_type": "markdown", 271 | "metadata": {}, 272 | "source": [ 273 | "### Create sample data" 274 | ] 275 | }, 276 | { 277 | "cell_type": "raw", 278 | "metadata": {}, 279 | "source": [ 280 | "/* Create an array of weather temperatures for San Francisco */\n", 281 | "\n", 282 | "var weather = [\n", 283 | " {\n", 284 | " \"day\": \"Jan 1\",\n", 285 | " \"temperature\": 60\n", 286 | " },\n", 287 | " {\n", 288 | " \"day\": \"Jan 2\",\n", 289 | " \"temperature\": 58\n", 290 | " },\n", 291 | " {\n", 292 | " \"day\": \"Jan 3\",\n", 293 | " \"temperature\": 57\n", 294 | " },\n", 295 | " {\n", 296 | " \"day\": \"Jan 4\",\n", 297 | " \"temperature\": 60\n", 298 | " },\n", 299 | " {\n", 300 | " \"day\": \"Jan 5\",\n", 301 | " \"temperature\": 62\n", 302 | " },\n", 303 | " {\n", 304 | " \"day\": \"Jan 6\",\n", 305 | " \"temperature\": 65\n", 306 | " },\n", 307 | " {\n", 308 | " \"day\": \"Jan 7\",\n", 309 | " \"temperature\": 65\n", 310 | " },\n", 311 | " {\n", 312 | " \"day\": \"Jan 8\",\n", 313 | " \"temperature\": 64\n", 314 | " },\n", 315 | " {\n", 316 | " \"day\": \"Jan 9\",\n", 317 | " \"temperature\": 64\n", 318 | " },\n", 319 | " {\n", 320 | " \"day\": \"Jan 10\",\n", 321 | " \"temperature\": 63\n", 322 | " },\n", 323 | " {\n", 324 | " \"day\": \"Jan 11\",\n", 325 | " \"temperature\": 60\n", 326 | " }\n", 327 | "]" 328 | ] 329 | }, 330 | { 331 | "cell_type": "markdown", 332 | "metadata": {}, 333 | "source": [ 334 | "### Create and Explain an SQL query" 335 | ] 336 | }, 337 | { 338 | "cell_type": "markdown", 339 | "metadata": {}, 340 | "source": [ 341 | "#### Create query" 342 | ] 343 | }, 344 | { 345 | "cell_type": "raw", 346 | "metadata": {}, 347 | "source": [ 348 | "# Table department, columns = [Id, Name]\n", 349 | "# Table employee, columns = [Id, department_id]\n", 350 | "# Table salary_payments, columns = [Id, employee_id, date]\n", 351 | "\n", 352 | "# Create a query to find out the name of departments where more than 10 employees were paid their salary in the month of June 2020.\n", 353 | "\n", 354 | "Output:\n", 355 | "\n", 356 | "select d.name as 'Department Name'\n", 357 | "from department as d\n", 358 | "join employee as e on d.id = e.department_id\n", 359 | "join salary_payments as s on s.employee_id = e.id\n", 360 | "where s.date like 'June 2020'\n", 361 | "group by d.name\n", 362 | "having count(*) > 10" 363 | ] 364 | }, 365 | { 366 | "cell_type": "markdown", 367 | "metadata": {}, 368 | "source": [ 369 | "#### Explain query" 370 | ] 371 | }, 372 | { 373 | "cell_type": "raw", 374 | "metadata": {}, 375 | "source": [ 376 | "SELECT DISTINCT department.name\n", 377 | "FROM department\n", 378 | "JOIN employee ON department.id = employee.department_id\n", 379 | "JOIN salary_payments ON employee.id = salary_payments.employee_id\n", 380 | "WHERE salary_payments.date BETWEEN '2020-06-01' AND '2020-06-30'\n", 381 | "GROUP BY department.name\n", 382 | "HAVING COUNT(employee.id) > 10;\n", 383 | "-- Explanation of the above query in human readable format\n", 384 | "-- \n", 385 | "\n", 386 | "Output:\n", 387 | "-- Get all departments that have at least 10 employees who were paid salary in June 2020" 388 | ] 389 | }, 390 | { 391 | "cell_type": "code", 392 | "execution_count": null, 393 | "metadata": {}, 394 | "outputs": [], 395 | "source": [] 396 | } 397 | ], 398 | "metadata": { 399 | "kernelspec": { 400 | "display_name": "Python 3", 401 | "language": "python", 402 | "name": "python3" 403 | }, 404 | "language_info": { 405 | "codemirror_mode": { 406 | "name": "ipython", 407 | "version": 3 408 | }, 409 | "file_extension": ".py", 410 | "mimetype": "text/x-python", 411 | "name": "python", 412 | "nbconvert_exporter": "python", 413 | "pygments_lexer": "ipython3", 414 | "version": "3.8.5" 415 | }, 416 | "toc": { 417 | "base_numbering": 1, 418 | "nav_menu": {}, 419 | "number_sections": true, 420 | "sideBar": true, 421 | "skip_h1_title": false, 422 | "title_cell": "Table of Contents", 423 | "title_sidebar": "Contents", 424 | "toc_cell": false, 425 | "toc_position": {}, 426 | "toc_section_display": true, 427 | "toc_window_display": false 428 | } 429 | }, 430 | "nbformat": 4, 431 | "nbformat_minor": 4 432 | } 433 | -------------------------------------------------------------------------------- /examples/dev.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import yaml" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": 2, 15 | "metadata": {}, 16 | "outputs": [ 17 | { 18 | "name": "stdout", 19 | "output_type": "stream", 20 | "text": [ 21 | "C:\\Users\\w_gon\\projects\\GPT-3\\gpt3sql\\examples\n" 22 | ] 23 | } 24 | ], 25 | "source": [ 26 | "!cd" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": 5, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "with open(\"../app/cfg/settings.yaml\") as f:\n", 36 | " CFG = yaml.load(f.read(), Loader=yaml.SafeLoader)" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 6, 42 | "metadata": {}, 43 | "outputs": [ 44 | { 45 | "data": { 46 | "text/plain": [ 47 | "{'Mode': ['Complete', 'Insert', 'Edit'],\n", 48 | " 'Model': ['davinci-instruct-beta', 'text-davinci-002', 'text-davinci-001'],\n", 49 | " 'Temperature': 0.1,\n", 50 | " 'Maximum_length': 256,\n", 51 | " 'Input_prefix': 'input: ',\n", 52 | " 'Input_suffix': '\\n',\n", 53 | " 'Output_prefix': 'output: ',\n", 54 | " 'Output_suffix': '\\n\\n',\n", 55 | " 'DB_FILE': 'db/gpt3sql.sqlite'}" 56 | ] 57 | }, 58 | "execution_count": 6, 59 | "metadata": {}, 60 | "output_type": "execute_result" 61 | } 62 | ], 63 | "source": [ 64 | "CFG" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": 10, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "modes = CFG['Mode']" 74 | ] 75 | }, 76 | { 77 | "cell_type": "code", 78 | "execution_count": 11, 79 | "metadata": {}, 80 | "outputs": [], 81 | "source": [ 82 | "x = modes.pop(modes.index(\"Edit\"))" 83 | ] 84 | }, 85 | { 86 | "cell_type": "code", 87 | "execution_count": 13, 88 | "metadata": {}, 89 | "outputs": [ 90 | { 91 | "data": { 92 | "text/plain": [ 93 | "['Complete', 'Insert']" 94 | ] 95 | }, 96 | "execution_count": 13, 97 | "metadata": {}, 98 | "output_type": "execute_result" 99 | } 100 | ], 101 | "source": [ 102 | "modes" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": 14, 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "modes.insert(0, \"Edit\")" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": 15, 117 | "metadata": {}, 118 | "outputs": [ 119 | { 120 | "data": { 121 | "text/plain": [ 122 | "['Edit', 'Complete', 'Insert']" 123 | ] 124 | }, 125 | "execution_count": 15, 126 | "metadata": {}, 127 | "output_type": "execute_result" 128 | } 129 | ], 130 | "source": [ 131 | "modes" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": 29, 137 | "metadata": {}, 138 | "outputs": [], 139 | "source": [ 140 | "def move_item_to_first(lst, item):\n", 141 | " \"\"\"Move item found in a list to position 0\n", 142 | " \"\"\"\n", 143 | " try:\n", 144 | " idx = lst.index(item)\n", 145 | " except:\n", 146 | " idx = -1\n", 147 | " if idx < 1:\n", 148 | " return lst\n", 149 | " \n", 150 | " lst_new = lst.copy()\n", 151 | " lst_new.pop(idx)\n", 152 | " lst_new.insert(0, item)\n", 153 | " return lst_new" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": 30, 159 | "metadata": {}, 160 | "outputs": [ 161 | { 162 | "data": { 163 | "text/plain": [ 164 | "['Complete', 'Edit', 'Insert']" 165 | ] 166 | }, 167 | "execution_count": 30, 168 | "metadata": {}, 169 | "output_type": "execute_result" 170 | } 171 | ], 172 | "source": [ 173 | "move_item_to_first(CFG['Mode'], \"Complete\")" 174 | ] 175 | }, 176 | { 177 | "cell_type": "code", 178 | "execution_count": 31, 179 | "metadata": {}, 180 | "outputs": [ 181 | { 182 | "data": { 183 | "text/plain": [ 184 | "['Edit', 'Complete', 'Insert']" 185 | ] 186 | }, 187 | "execution_count": 31, 188 | "metadata": {}, 189 | "output_type": "execute_result" 190 | } 191 | ], 192 | "source": [ 193 | "CFG['Mode']" 194 | ] 195 | }, 196 | { 197 | "cell_type": "code", 198 | "execution_count": 1, 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "d = {'Mode': None, 'Model': 'davinci-instruct-beta', 'Temperature': 0, 'Maximum_length': 256}" 203 | ] 204 | }, 205 | { 206 | "cell_type": "code", 207 | "execution_count": 2, 208 | "metadata": {}, 209 | "outputs": [ 210 | { 211 | "data": { 212 | "text/plain": [ 213 | "{'Mode': None,\n", 214 | " 'Model': 'davinci-instruct-beta',\n", 215 | " 'Temperature': 0,\n", 216 | " 'Maximum_length': 256}" 217 | ] 218 | }, 219 | "execution_count": 2, 220 | "metadata": {}, 221 | "output_type": "execute_result" 222 | } 223 | ], 224 | "source": [ 225 | "d" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 3, 231 | "metadata": {}, 232 | "outputs": [ 233 | { 234 | "data": { 235 | "text/plain": [ 236 | "\"{'Mode': None, 'Model': 'davinci-instruct-beta', 'Temperature': 0, 'Maximum_length': 256}\"" 237 | ] 238 | }, 239 | "execution_count": 3, 240 | "metadata": {}, 241 | "output_type": "execute_result" 242 | } 243 | ], 244 | "source": [ 245 | "str(d)" 246 | ] 247 | }, 248 | { 249 | "cell_type": "code", 250 | "execution_count": 12, 251 | "metadata": {}, 252 | "outputs": [], 253 | "source": [ 254 | "def escape_single_quote(s):\n", 255 | " return s.replace(\"\\'\", \"\\'\\'\")\n", 256 | "\n", 257 | "def unescape_single_quote(s):\n", 258 | " return s.replace(\"\\'\\'\", \"\\'\")" 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": 15, 264 | "metadata": {}, 265 | "outputs": [ 266 | { 267 | "data": { 268 | "text/plain": [ 269 | "\"{''Mode'': None, ''Model'': ''davinci-instruct-beta'', ''Temperature'': 0, ''Maximum_length'': 256}\"" 270 | ] 271 | }, 272 | "execution_count": 15, 273 | "metadata": {}, 274 | "output_type": "execute_result" 275 | } 276 | ], 277 | "source": [ 278 | "escape_single_quote(str(d))" 279 | ] 280 | }, 281 | { 282 | "cell_type": "code", 283 | "execution_count": 16, 284 | "metadata": {}, 285 | "outputs": [ 286 | { 287 | "data": { 288 | "text/plain": [ 289 | "\"{'Mode': None, 'Model': 'davinci-instruct-beta', 'Temperature': 0, 'Maximum_length': 256}\"" 290 | ] 291 | }, 292 | "execution_count": 16, 293 | "metadata": {}, 294 | "output_type": "execute_result" 295 | } 296 | ], 297 | "source": [ 298 | "unescape_single_quote(escape_single_quote(str(d)))" 299 | ] 300 | }, 301 | { 302 | "cell_type": "code", 303 | "execution_count": 7, 304 | "metadata": {}, 305 | "outputs": [ 306 | { 307 | "data": { 308 | "text/plain": [ 309 | "\"a'b\"" 310 | ] 311 | }, 312 | "execution_count": 7, 313 | "metadata": {}, 314 | "output_type": "execute_result" 315 | } 316 | ], 317 | "source": [ 318 | "s" 319 | ] 320 | }, 321 | { 322 | "cell_type": "code", 323 | "execution_count": 8, 324 | "metadata": {}, 325 | "outputs": [], 326 | "source": [ 327 | "x = s.replace(\"\\'\", \"\\'\\'\")" 328 | ] 329 | }, 330 | { 331 | "cell_type": "code", 332 | "execution_count": 9, 333 | "metadata": {}, 334 | "outputs": [ 335 | { 336 | "data": { 337 | "text/plain": [ 338 | "\"a''b\"" 339 | ] 340 | }, 341 | "execution_count": 9, 342 | "metadata": {}, 343 | "output_type": "execute_result" 344 | } 345 | ], 346 | "source": [ 347 | "x" 348 | ] 349 | }, 350 | { 351 | "cell_type": "code", 352 | "execution_count": 17, 353 | "metadata": {}, 354 | "outputs": [], 355 | "source": [ 356 | "data = {'uuid': '400f2586-2d36-4bc7-b762-b73971d8a267', 'ts': '2022-10-17 16:07:26.615765', 'output': \"SELECT * FROM customers WHERE State='TX' AND FirstName='Richard';\\n\\n\"}" 357 | ] 358 | }, 359 | { 360 | "cell_type": "code", 361 | "execution_count": 18, 362 | "metadata": {}, 363 | "outputs": [ 364 | { 365 | "data": { 366 | "text/plain": [ 367 | "3" 368 | ] 369 | }, 370 | "execution_count": 18, 371 | "metadata": {}, 372 | "output_type": "execute_result" 373 | } 374 | ], 375 | "source": [ 376 | "len(data)" 377 | ] 378 | }, 379 | { 380 | "cell_type": "code", 381 | "execution_count": 21, 382 | "metadata": {}, 383 | "outputs": [], 384 | "source": [ 385 | "def prime_numbers(n):\n", 386 | " \"\"\"\n", 387 | " Calculate first 100 prime numbers\n", 388 | " \"\"\"\n", 389 | " primes = [2]\n", 390 | " for i in range(3, n+1):\n", 391 | " divisible = None\n", 392 | " for j in range(2, i):\n", 393 | " if i % j == 0:\n", 394 | " divisible = True\n", 395 | " break\n", 396 | " if divisible is None:\n", 397 | " primes.append(i)\n", 398 | " return primes" 399 | ] 400 | }, 401 | { 402 | "cell_type": "code", 403 | "execution_count": 24, 404 | "metadata": {}, 405 | "outputs": [ 406 | { 407 | "name": "stdout", 408 | "output_type": "stream", 409 | "text": [ 410 | "[2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97]\n" 411 | ] 412 | } 413 | ], 414 | "source": [ 415 | "print(prime_numbers(100))" 416 | ] 417 | }, 418 | { 419 | "cell_type": "code", 420 | "execution_count": 13, 421 | "metadata": {}, 422 | "outputs": [ 423 | { 424 | "data": { 425 | "text/plain": [ 426 | "1" 427 | ] 428 | }, 429 | "execution_count": 13, 430 | "metadata": {}, 431 | "output_type": "execute_result" 432 | } 433 | ], 434 | "source": [ 435 | "4 % 3" 436 | ] 437 | }, 438 | { 439 | "cell_type": "markdown", 440 | "metadata": {}, 441 | "source": [ 442 | "### How to execute python code as string\n", 443 | "\n", 444 | "https://stackoverflow.com/questions/701802/how-do-i-execute-a-string-containing-python-code-in-python" 445 | ] 446 | }, 447 | { 448 | "cell_type": "code", 449 | "execution_count": null, 450 | "metadata": {}, 451 | "outputs": [], 452 | "source": [ 453 | "import math\n", 454 | "def is_prime(n):\n", 455 | " if n == 2:\n", 456 | " return True\n", 457 | " if n % 2 == 0 or n <= 1:\n", 458 | " return False\n", 459 | "\n", 460 | " sqr = int(math.sqrt(n)) + 1\n", 461 | "\n", 462 | " for divisor in range(3, sqr, 2):\n", 463 | " if n % divisor == 0:\n", 464 | " return False\n", 465 | " return True\n", 466 | "\n", 467 | "\n", 468 | "def get_primes(n):\n", 469 | " number_of_primes = 0\n", 470 | " prime = 1\n", 471 | "\n", 472 | " while number_of_primes < n:\n", 473 | " prime += 1\n", 474 | " if is_prime(prime):\n", 475 | " print(prime)\n", 476 | " number_of_primes += 1" 477 | ] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": 4, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [ 485 | "gen_code = \"\"\"\n", 486 | "import math\n", 487 | "def is_prime(n):\n", 488 | " if n == 2:\n", 489 | " return True\n", 490 | " if n % 2 == 0 or n <= 1:\n", 491 | " return False\n", 492 | "\n", 493 | " sqr = int(math.sqrt(n)) + 1\n", 494 | "\n", 495 | " for divisor in range(3, sqr, 2):\n", 496 | " if n % divisor == 0:\n", 497 | " return False\n", 498 | " return True\n", 499 | "\n", 500 | "\n", 501 | "def get_primes(n):\n", 502 | " number_of_primes = 0\n", 503 | " prime = 1\n", 504 | "\n", 505 | " while number_of_primes < n:\n", 506 | " prime += 1\n", 507 | " if is_prime(prime):\n", 508 | " print(prime)\n", 509 | " number_of_primes += 1\n", 510 | " \n", 511 | "get_primes(10)\n", 512 | "\"\"\"" 513 | ] 514 | }, 515 | { 516 | "cell_type": "code", 517 | "execution_count": 3, 518 | "metadata": {}, 519 | "outputs": [ 520 | { 521 | "name": "stdout", 522 | "output_type": "stream", 523 | "text": [ 524 | "2\n", 525 | "3\n", 526 | "5\n", 527 | "7\n", 528 | "11\n", 529 | "13\n", 530 | "17\n", 531 | "19\n", 532 | "23\n", 533 | "29\n" 534 | ] 535 | } 536 | ], 537 | "source": [ 538 | "get_primes(10)" 539 | ] 540 | }, 541 | { 542 | "cell_type": "code", 543 | "execution_count": 5, 544 | "metadata": {}, 545 | "outputs": [ 546 | { 547 | "ename": "SyntaxError", 548 | "evalue": "invalid syntax (, line 2)", 549 | "output_type": "error", 550 | "traceback": [ 551 | "Traceback \u001b[1;36m(most recent call last)\u001b[0m:\n", 552 | " File \u001b[0;32m\"C:\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py\"\u001b[0m, line \u001b[0;32m3418\u001b[0m, in \u001b[0;35mrun_code\u001b[0m\n exec(code_obj, self.user_global_ns, self.user_ns)\n", 553 | "\u001b[1;36m File \u001b[1;32m\"\"\u001b[1;36m, line \u001b[1;32m1\u001b[1;36m, in \u001b[1;35m\u001b[1;36m\u001b[0m\n\u001b[1;33m eval(gen_code)\u001b[0m\n", 554 | "\u001b[1;36m File \u001b[1;32m\"\"\u001b[1;36m, line \u001b[1;32m2\u001b[0m\n\u001b[1;33m import math\u001b[0m\n\u001b[1;37m ^\u001b[0m\n\u001b[1;31mSyntaxError\u001b[0m\u001b[1;31m:\u001b[0m invalid syntax\n" 555 | ] 556 | } 557 | ], 558 | "source": [ 559 | "eval(gen_code)" 560 | ] 561 | }, 562 | { 563 | "cell_type": "code", 564 | "execution_count": 21, 565 | "metadata": {}, 566 | "outputs": [], 567 | "source": [ 568 | "import sys\n", 569 | "from io import StringIO\n", 570 | "\n", 571 | "# create file-like string to capture output\n", 572 | "codeOut = StringIO()\n", 573 | "codeErr = StringIO()\n", 574 | "\n", 575 | "code = \"\"\"\n", 576 | "def f(x):\n", 577 | " x = x + 1\n", 578 | " return x\n", 579 | "\n", 580 | "print ('This is my output.')\n", 581 | "\n", 582 | "print (f(4))\n", 583 | "\"\"\"\n", 584 | "\n", 585 | "# capture output and errors\n", 586 | "sys.stdout = codeOut\n", 587 | "sys.stderr = codeErr\n", 588 | "\n", 589 | "exec (code)\n", 590 | "\n", 591 | "# restore stdout and stderr\n", 592 | "sys.stdout = sys.__stdout__\n", 593 | "sys.stderr = sys.__stderr__\n", 594 | "\n", 595 | "print(f\"\"\"\n", 596 | "Output: {codeOut.getvalue()}\n", 597 | "Error: {codeErr.getvalue()}\n", 598 | "\"\"\")\n", 599 | "\n", 600 | "codeOut.close()\n", 601 | "codeErr.close()" 602 | ] 603 | }, 604 | { 605 | "cell_type": "code", 606 | "execution_count": 45, 607 | "metadata": {}, 608 | "outputs": [], 609 | "source": [ 610 | "def _execute_code(code):\n", 611 | " # https://stackoverflow.com/questions/11914472/how-to-use-stringio-in-python3\n", 612 | " # create file-like string to capture output\n", 613 | " codeOut = StringIO()\n", 614 | " codeErr = StringIO()\n", 615 | " # capture output and errors\n", 616 | " sys.stdout = codeOut\n", 617 | " sys.stderr = codeErr\n", 618 | " exec(compile(code, '', 'exec'))\n", 619 | " # restore stdout and stderr\n", 620 | " sys.stdout = sys.__stdout__\n", 621 | " sys.stderr = sys.__stderr__\n", 622 | " if codeOut:\n", 623 | " print(codeOut.getvalue())\n", 624 | " if codeErr:\n", 625 | " print(codeErr.getvalue())" 626 | ] 627 | }, 628 | { 629 | "cell_type": "code", 630 | "execution_count": 49, 631 | "metadata": {}, 632 | "outputs": [], 633 | "source": [ 634 | "code = \"\"\"\n", 635 | "import math\n", 636 | "\n", 637 | "def is_prime(n):\n", 638 | " if n == 2:\n", 639 | " return True\n", 640 | " if n % 2 == 0 or n <= 1:\n", 641 | " return False\n", 642 | "\n", 643 | " sqr = int(math.sqrt(n)) + 1\n", 644 | "\n", 645 | " for divisor in range(3, sqr, 2):\n", 646 | " if n % divisor == 0:\n", 647 | " return False\n", 648 | " return True\n", 649 | "\n", 650 | "\n", 651 | "def get_primes(n):\n", 652 | " number_of_primes = 0\n", 653 | " prime = 1\n", 654 | "\n", 655 | " while number_of_primes < n:\n", 656 | " prime += 1\n", 657 | " if is_prime(prime):\n", 658 | " print(prime)\n", 659 | " number_of_primes += 1\n", 660 | "\n", 661 | "\n", 662 | "get_primes(10)\n", 663 | "\"\"\"" 664 | ] 665 | }, 666 | { 667 | "cell_type": "code", 668 | "execution_count": 52, 669 | "metadata": {}, 670 | "outputs": [], 671 | "source": [ 672 | "x = _execute_code(code)" 673 | ] 674 | }, 675 | { 676 | "cell_type": "code", 677 | "execution_count": 54, 678 | "metadata": {}, 679 | "outputs": [], 680 | "source": [ 681 | "import math\n", 682 | "\n", 683 | "def is_prime(n):\n", 684 | " if n == 2:\n", 685 | " return True\n", 686 | " if n % 2 == 0 or n <= 1:\n", 687 | " return False\n", 688 | "\n", 689 | " sqr = int(math.sqrt(n)) + 1\n", 690 | "\n", 691 | " for divisor in range(3, sqr, 2):\n", 692 | " if n % divisor == 0:\n", 693 | " return False\n", 694 | " return True\n", 695 | "\n", 696 | "\n", 697 | "def get_primes(n):\n", 698 | " number_of_primes = 0\n", 699 | " prime = 1\n", 700 | "\n", 701 | " while number_of_primes < n:\n", 702 | " prime += 1\n", 703 | " if is_prime(prime):\n", 704 | " print(prime)\n", 705 | " number_of_primes += 1\n", 706 | "\n", 707 | "\n", 708 | "get_primes(100)" 709 | ] 710 | }, 711 | { 712 | "cell_type": "code", 713 | "execution_count": null, 714 | "metadata": {}, 715 | "outputs": [], 716 | "source": [] 717 | } 718 | ], 719 | "metadata": { 720 | "kernelspec": { 721 | "display_name": "Python 3", 722 | "language": "python", 723 | "name": "python3" 724 | }, 725 | "language_info": { 726 | "codemirror_mode": { 727 | "name": "ipython", 728 | "version": 3 729 | }, 730 | "file_extension": ".py", 731 | "mimetype": "text/x-python", 732 | "name": "python", 733 | "nbconvert_exporter": "python", 734 | "pygments_lexer": "ipython3", 735 | "version": "3.8.5" 736 | }, 737 | "toc": { 738 | "base_numbering": 1, 739 | "nav_menu": {}, 740 | "number_sections": true, 741 | "sideBar": true, 742 | "skip_h1_title": false, 743 | "title_cell": "Table of Contents", 744 | "title_sidebar": "Contents", 745 | "toc_cell": false, 746 | "toc_position": {}, 747 | "toc_section_display": true, 748 | "toc_window_display": false 749 | } 750 | }, 751 | "nbformat": 4, 752 | "nbformat_minor": 4 753 | } 754 | -------------------------------------------------------------------------------- /examples/example-python.md: -------------------------------------------------------------------------------- 1 | use davinci-instruct-beta 2 | 3 | ## say "Hello" in python 4 | """ 5 | Ask the user for their name and say "Hello" in python 6 | """ 7 | 8 | name = input("What is your name? ") 9 | 10 | print("Hello, " + name) 11 | 12 | 13 | """ 14 | 1. Create a list of first names 15 | 2. Create a list of last names 16 | 3. Combine them randomly into a list of 100 full names 17 | """ -------------------------------------------------------------------------------- /examples/example-sql.md: -------------------------------------------------------------------------------- 1 | Explain SQL: 2 | 3 | SELECT DISTINCT department.name 4 | FROM department 5 | JOIN employee ON department.id = employee.department_id 6 | JOIN salary_payments ON employee.id = salary_payments.employee_id 7 | WHERE salary_payments.date BETWEEN '2020-06-01' AND '2020-06-30' 8 | GROUP BY department.name 9 | HAVING COUNT(employee.id) > 10; 10 | -- Explanation of the above query in human readable format 11 | -- 12 | 13 | Output: 14 | 15 | -- For each department, 16 | -- 1. Join the department and employee tables together, so we can get the department name for each employee. 17 | -- 2. Join the employee and salary_payments tables together, so we can get the employee ID for each salary payment. 18 | -- 3. Get all salary payments made in June 2020. 19 | -- 4. Group all records by department name. 20 | -- 5. Only return departments where there were more than 10 distinct employees who received salary payments in June 2020. 21 | 22 | 23 | # Table department, columns = [Id, Name] 24 | # Table employee, columns = [Id, department_id] 25 | # Table salary_payments, columns = [Id, employee_id, date] 26 | 27 | # Create a query to find out the name of departments where more than 10 employees were paid their salary in the month of June 2020. 28 | 29 | Output: 30 | 31 | select d.name as 'Department Name' 32 | from department as d 33 | join employee as e on d.id = e.department_id 34 | join salary_payments as s on s.employee_id = e.id 35 | where s.date like 'June 2020' 36 | group by d.name 37 | having count(*) > 10 38 | 39 | 40 | Chinook SQLite DB 41 | 42 | Table invoices, columns=[InvoiceId, CustomerId, InvoiceDate, BillingState, Total] 43 | Table invoice_items, columns=[InvoiceId, TrackId, UnitPrice, Quantity] 44 | Table customers, columns=[CustomerId, FirstName, LastName, State, Country, Email] 45 | Table albums, columns = [AlbumId, Title, ArtistId] 46 | Table tracks, columns = [TrackId, Name, AlbumId] 47 | Table artists, columns = [ArtistId, Name] 48 | Table media_types, columns = [MediaTypeId, Name] 49 | Table playlists, columns = [PlaylistId, Name] 50 | Table playlist_track, columns = [PlaylistId, TrackId] 51 | 52 | Create me a SQLite query for all customers in city of Cupertino from country of USA 53 | 54 | Create a SQLite query for total revenue in last year 55 | 56 | Create a SQLite query for top 3 customers who purchased the most albums in last year 57 | 58 | Create a SQLite query for top 5 albums that were sold the most in last year 59 | 60 | SELECT t.AlbumId, a.Title, COUNT(t.TrackId) AS tracks_sold FROM invoice_items AS i INNER JOIN tracks AS t ON i.TrackId = t.TrackId INNER JOIN albums AS a ON t.AlbumId = a.AlbumId INNER JOIN invoices AS n ON i.InvoiceId = n.InvoiceId WHERE n.InvoiceDate >= '2011-01-01' GROUP BY 1 61 | 62 | i.InvoiceDate >= '2011-01-01' 63 | 64 | 65 | -------------------------------------------------------------------------------- /examples/gpt3sql-notebook.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "URL = https://beta.openai.com/playground?mode=complete&model=davinci-instruct-beta" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": 2, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import openai\n", 18 | "\n", 19 | "openai.api_key = os.getenv(\"OPENAI_API_KEY\")" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 4, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "openai.api_key = \"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\"" 29 | ] 30 | }, 31 | { 32 | "cell_type": "code", 33 | "execution_count": 11, 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | "response = openai.Completion.create(\n", 38 | " model=\"davinci-instruct-beta\",\n", 39 | " prompt=\"\\\"\\\"\\\"\\nTable customers, columns = [CustomerId, FirstName, LastName, State]\\nCreate a SQLite query for all customers in Texas named Jane\\n\\\"\\\"\\\"\\n\\n\\n\",\n", 40 | " temperature=0,\n", 41 | " max_tokens=256,\n", 42 | " top_p=1,\n", 43 | " frequency_penalty=0,\n", 44 | " presence_penalty=0\n", 45 | ")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": 12, 51 | "metadata": {}, 52 | "outputs": [ 53 | { 54 | "name": "stdout", 55 | "output_type": "stream", 56 | "text": [ 57 | "\n", 58 | "SELECT * FROM customers WHERE State='TX' AND FirstName='Jane'\n", 59 | "\n", 60 | "\"\"\"\n", 61 | "\n", 62 | "Table customers, columns = [CustomerId, FirstName, LastName, State]\n", 63 | "Create a SQLite query for all customers in Texas named Jane\n", 64 | "\"\"\"\n", 65 | "\n", 66 | "SELECT * FROM customers WHERE State='TX' AND FirstName='Jane'\n", 67 | "\n", 68 | "\"\"\"\n", 69 | "\n", 70 | "Table customers, columns = [CustomerId, FirstName, LastName, State]\n", 71 | "Create a SQLite query for all customers in Texas named Jane\n", 72 | "\"\"\"\n", 73 | "\n", 74 | "SELECT * FROM customers WHERE State='TX' AND FirstName='Jane'\n", 75 | "\n", 76 | "\"\"\"\n", 77 | "\n", 78 | "Table customers, columns = [CustomerId, FirstName, LastName, State]\n", 79 | "Create a SQLite query for all customers in Texas named Jane\n", 80 | "\"\"\"\n", 81 | "\n", 82 | "SELECT * FROM customers WHERE State='TX' AND FirstName='Jane'\n", 83 | "\n", 84 | "\"\"\"\n", 85 | "\n", 86 | "Table customers, columns = [CustomerId, FirstName, LastName, State]\n", 87 | "Create a SQLite query for all customers in Texas named Jane\n", 88 | "\"\"\"\n", 89 | "\n", 90 | "SELECT * FROM customers WHERE State='TX' AND FirstName='Jane'\n", 91 | "\n", 92 | "\"\"\"\n", 93 | "\n", 94 | "Table customers, columns = [CustomerId, FirstName, LastName,\n" 95 | ] 96 | } 97 | ], 98 | "source": [ 99 | "print(response[\"choices\"][0][\"text\"])" 100 | ] 101 | }, 102 | { 103 | "cell_type": "code", 104 | "execution_count": null, 105 | "metadata": {}, 106 | "outputs": [], 107 | "source": [] 108 | } 109 | ], 110 | "metadata": { 111 | "kernelspec": { 112 | "display_name": "Python 3", 113 | "language": "python", 114 | "name": "python3" 115 | }, 116 | "language_info": { 117 | "codemirror_mode": { 118 | "name": "ipython", 119 | "version": 3 120 | }, 121 | "file_extension": ".py", 122 | "mimetype": "text/x-python", 123 | "name": "python", 124 | "nbconvert_exporter": "python", 125 | "pygments_lexer": "ipython3", 126 | "version": "3.8.5" 127 | }, 128 | "toc": { 129 | "base_numbering": 1, 130 | "nav_menu": {}, 131 | "number_sections": true, 132 | "sideBar": true, 133 | "skip_h1_title": false, 134 | "title_cell": "Table of Contents", 135 | "title_sidebar": "Contents", 136 | "toc_cell": false, 137 | "toc_position": {}, 138 | "toc_section_display": true, 139 | "toc_window_display": false 140 | } 141 | }, 142 | "nbformat": 4, 143 | "nbformat_minor": 4 144 | } 145 | -------------------------------------------------------------------------------- /examples/launch_notebook.bat: -------------------------------------------------------------------------------- 1 | jupyter notebook -------------------------------------------------------------------------------- /examples/prime.py: -------------------------------------------------------------------------------- 1 | # write a function to calculate prime 2 | 3 | def calculate_prime(max_number): 4 | prime_list = [] 5 | for num in range(2, max_number): 6 | if num > 1: 7 | for i in range(2, num): 8 | if (num % i) == 0: 9 | break 10 | else: 11 | prime_list.append(num) 12 | return prime_list 13 | 14 | # call the function 15 | 16 | calculate_prime(100) 17 | 18 | 19 | -------------------------------------------------------------------------------- /examples/prime_num.ps1: -------------------------------------------------------------------------------- 1 | # Write a powershell script to calculate prime numbers less than 1000. 2 | 3 | $prime = @() 4 | 5 | for ($i = 2; $i -lt 1000; $i++) 6 | 7 | { 8 | 9 | $isPrime = $true 10 | 11 | for ($j = 2; $j -lt $i; $j++) 12 | 13 | { 14 | 15 | if ($i % $j -eq 0) 16 | 17 | { 18 | 19 | $isPrime = $false 20 | 21 | break 22 | 23 | } 24 | 25 | } 26 | 27 | if ($isPrime) 28 | 29 | { 30 | 31 | $prime += $i 32 | 33 | } 34 | 35 | } 36 | 37 | $prime -------------------------------------------------------------------------------- /examples/st_app_goog_chart.py: -------------------------------------------------------------------------------- 1 | # An app to show Google stocks. 2 | # see https://streamlit-example-app-streamlit-codex-streamlit-app-wfi4of.streamlitapp.com/ 3 | # This app is created by GPT-3 Codex by providing prompt (line 6-11). 4 | # The rest is completed by GPT-3 with minor revision 5 | 6 | """ 7 | App which shows price for Google Stock from 09/11/2019 to 09/15/2021 8 | """ 9 | 10 | import streamlit as st 11 | import yfinance as yf 12 | 13 | import pandas as pd 14 | import plotly.express as px 15 | 16 | st.title("Google Stock Price") 17 | 18 | tickerSymbol = "GOOGL" 19 | tickerData = yf.Ticker(tickerSymbol) 20 | data = tickerData.history(period='1d', start='2019-11-09', end='2021-09-15') 21 | 22 | # Show the data as a table 23 | st.dataframe(data) 24 | 25 | # Show the close price 26 | st.line_chart(data['Close']) 27 | 28 | 29 | # Show the data as a time series 30 | st.area_chart(data['Volume']) 31 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # streamlit>=1.13.0<=1.14.1 2 | # streamlit-aggrid>=0.3.3 3 | # openai>=0.23.1 4 | 5 | streamlit 6 | streamlit-aggrid==0.3.5 7 | openai>=0.23.1 8 | --------------------------------------------------------------------------------