├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE.md ├── NOTICE.md ├── README.md ├── SETUP.md ├── build_boto3_layer.sh ├── cloudformation ├── parameters │ ├── us_east_1.json │ └── us_west_2.json └── sagemaker_studio.yml ├── images ├── DnP.png ├── jupyter_notebook.png ├── jupyterlab_open.png ├── nl2sql_workshop.png ├── notebooks.png ├── rag_nl_to_sql.png ├── sm-started1.png ├── sm-started2.png ├── sm-started3.png ├── sm-started4.png ├── sm-started5.png ├── sm-started6.png ├── sm_open_jupyterlab_space.png ├── sm_studio_menu.png ├── sm_studio_new.png └── workshop_architecture.png ├── libs └── din_sql │ ├── din_sql_lib.py │ └── prompt_templates │ ├── classification_prompt.txt.jinja │ ├── clean_query_prompt.txt.jinja │ ├── easy_prompt.txt.jinja │ ├── hard_prompt.txt.jinja │ ├── medium_prompt.txt.jinja │ └── schema_linking_prompt.txt.jinja ├── module_1 ├── 01_single-table-optimized-for-latency.ipynb ├── content │ └── model-access-error.png └── diabetes.csv ├── module_2 ├── 01_din_sql.ipynb ├── 02_few_shot_text2sql.ipynb └── content │ ├── DnP.png │ └── din_sql_methodology.png ├── module_3 ├── 01_text_to_sql_rag.ipynb └── content │ └── rag.png ├── module_4 ├── 01_prevent_SQL_injection.ipynb ├── 02_prevent_prompt_injection.ipynb └── README.md ├── module_5 └── 01_Fine_Tune_Amazon_Titan.ipynb └── utilities.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/python,macos,jupyternotebooks 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=python,macos,jupyternotebooks 3 | 4 | ### JupyterNotebooks ### 5 | # gitignore template for Jupyter Notebooks 6 | # website: http://jupyter.org/ 7 | 8 | .ipynb_checkpoints 9 | */.ipynb_checkpoints/* 10 | 11 | # IPython 12 | profile_default/ 13 | ipython_config.py 14 | 15 | # Remove previous ipynb_checkpoints 16 | # git rm -r .ipynb_checkpoints/ 17 | 18 | ### macOS ### 19 | # General 20 | .DS_Store 21 | .AppleDouble 22 | .LSOverride 23 | 24 | # Icon must end with two \r 25 | Icon 26 | 27 | 28 | # Thumbnails 29 | ._* 30 | 31 | # Files that might appear in the root of a volume 32 | .DocumentRevisions-V100 33 | .fseventsd 34 | .Spotlight-V100 35 | .TemporaryItems 36 | .Trashes 37 | .VolumeIcon.icns 38 | .com.apple.timemachine.donotpresent 39 | 40 | # Directories potentially created on remote AFP share 41 | .AppleDB 42 | .AppleDesktop 43 | Network Trash Folder 44 | Temporary Items 45 | .apdisk 46 | 47 | ### macOS Patch ### 48 | # iCloud generated files 49 | *.icloud 50 | 51 | ### Python ### 52 | # Byte-compiled / optimized / DLL files 53 | __pycache__/ 54 | */*__pycache__ 55 | *.py[cod] 56 | *$py.class 57 | 58 | # C extensions 59 | *.so 60 | 61 | # Distribution / packaging 62 | .Python 63 | build/ 64 | develop-eggs/ 65 | dist/ 66 | downloads/ 67 | eggs/ 68 | .eggs/ 69 | lib/ 70 | lib64/ 71 | parts/ 72 | sdist/ 73 | var/ 74 | wheels/ 75 | share/python-wheels/ 76 | *.egg-info/ 77 | .installed.cfg 78 | *.egg 79 | MANIFEST 80 | 81 | # PyInstaller 82 | # Usually these files are written by a python script from a template 83 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 84 | *.manifest 85 | *.spec 86 | 87 | # Installer logs 88 | pip-log.txt 89 | pip-delete-this-directory.txt 90 | 91 | # Unit test / coverage reports 92 | htmlcov/ 93 | .tox/ 94 | .nox/ 95 | .coverage 96 | .coverage.* 97 | .cache 98 | nosetests.xml 99 | coverage.xml 100 | *.cover 101 | *.py,cover 102 | .hypothesis/ 103 | .pytest_cache/ 104 | cover/ 105 | 106 | # Translations 107 | *.mo 108 | *.pot 109 | vectorstore/ 110 | 111 | # Django stuff: 112 | *.log 113 | local_settings.py 114 | db.sqlite3 115 | db.sqlite3-journal 116 | 117 | # Flask stuff: 118 | instance/ 119 | .webassets-cache 120 | 121 | # Scrapy stuff: 122 | .scrapy 123 | 124 | # Sphinx documentation 125 | docs/_build/ 126 | 127 | # PyBuilder 128 | .pybuilder/ 129 | target/ 130 | 131 | # Jupyter Notebook 132 | 133 | # IPython 134 | 135 | # pyenv 136 | # For a library or package, you might want to ignore these files since the code is 137 | # intended to run in multiple environments; otherwise, check them in: 138 | # .python-version 139 | 140 | # pipenv 141 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 142 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 143 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 144 | # install all needed dependencies. 145 | #Pipfile.lock 146 | 147 | # poetry 148 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 149 | # This is especially recommended for binary packages to ensure reproducibility, and is more 150 | # commonly ignored for libraries. 151 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 152 | #poetry.lock 153 | 154 | # pdm 155 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 156 | #pdm.lock 157 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 158 | # in version control. 159 | # https://pdm.fming.dev/#use-with-ide 160 | .pdm.toml 161 | 162 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 163 | __pypackages__/ 164 | 165 | # Celery stuff 166 | celerybeat-schedule 167 | celerybeat.pid 168 | 169 | # SageMath parsed files 170 | *.sage.py 171 | 172 | # Environments 173 | .env 174 | .venv 175 | env/ 176 | venv/ 177 | ENV/ 178 | env.bak/ 179 | venv.bak/ 180 | 181 | # Spyder project settings 182 | .spyderproject 183 | .spyproject 184 | 185 | # Rope project settings 186 | .ropeproject 187 | 188 | # mkdocs documentation 189 | /site 190 | 191 | # mypy 192 | .mypy_cache/ 193 | .dmypy.json 194 | dmypy.json 195 | 196 | # Pyre type checker 197 | .pyre/ 198 | 199 | # pytype static type analyzer 200 | .pytype/ 201 | 202 | # Cython debug symbols 203 | cython_debug/ 204 | 205 | # PyCharm 206 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 207 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 208 | # and can be added to the global gitignore or merged into this file. For a more nuclear 209 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 210 | #.idea/ 211 | 212 | ### Python Patch ### 213 | # Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration 214 | poetry.toml 215 | 216 | # ruff 217 | .ruff_cache/ 218 | 219 | # LSP config files 220 | pyrightconfig.json 221 | 222 | # ignore notebook outputs 223 | **question*.json 224 | 225 | # ignore layers artifacts 226 | cloudformation/layers/* 227 | 228 | # End of https://www.toptal.com/developers/gitignore/api/python,macos,jupyternotebooks -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | == Code of Conduct 2 | This project has adopted the link:https://aws.github.io/code-of-conduct[Amazon Open Source Code of Conduct]. 3 | For more information see the link:https://aws.github.io/code-of-conduct-faq[Code of Conduct FAQ] or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, website page, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | ## Expected Notebook Structure 10 | 11 | If you're contributing a net-new workbook, it should include both a Jupyter notebook and a corresponding updates to our readme.. Additionally, for both notebook updates and new releases, here are the sections that should be completed in the notebook: 12 | 13 | 1. Overview: Describe the problem and how this notebook solves it. 14 | 2. Context or Details about feature/use case: Reference any public works or arxiv papers associated with the concepts discussed in this notebook. 15 | 3. Prerequisites: Pip install any dependencies **with their version numbers!**. 16 | 4. Setup: describe with code and descriptions whats required in order to begin using the notebook. 17 | 5. Your code with comments. Comments should explain what the code is doing 18 | 6. Other Considerations or Advanced section or Best Practices 19 | 7. Next Steps 20 | 8. Cleanup: delete all resources created in the notebook 21 | 22 | Be sure to save the notebook in the appropriate module folder, e.g., `repo/module_2/your_notebook.ipynb`. 23 | 24 | ## Reporting Bugs/Feature Requests 25 | 26 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 27 | 28 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 29 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 30 | 31 | * A reproducible test case or series of steps 32 | * The version of our code being used 33 | * Any modifications you've made relevant to the bug 34 | * Anything unusual about your environment or deployment 35 | 36 | 37 | ## Contributing via Pull Requests 38 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 39 | 40 | 1. You are working against the latest source on the *main* branch. 41 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 42 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 43 | 44 | To send us a pull request, please: 45 | 46 | 1. Fork the repository. 47 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 48 | 3. Ensure local tests pass. 49 | 4. Commit to your fork using clear commit messages. 50 | 5. Send us a pull request, answering any default questions in the pull request interface. 51 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 52 | 53 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 54 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 55 | 56 | 57 | ## Finding contributions to work on 58 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 59 | 60 | 61 | ## Code of Conduct 62 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 63 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 64 | opensource-codeofconduct@amazon.com with any additional questions or comments. 65 | 66 | 67 | ## Security issue notifications 68 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 69 | 70 | 71 | ## Licensing 72 | 73 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 74 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 4 | software and associated documentation files (the "Software"), to deal in the Software 5 | without restriction, including without limitation the rights to use, copy, modify, 6 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 7 | permit persons to whom the Software is furnished to do so. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 10 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 11 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 12 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 13 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 14 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /NOTICE.md: -------------------------------------------------------------------------------- 1 | AWS Workshop for Natural Language to SQL 2 | Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Overview of Natural Language to SQL 2 | 3 | Enterprise data warehouses represent many of the largest technology investments for companies across all industries in the past 20 years. While generative AI has shown a lot of promise in creating novel content and comprehending large corpora of information in unstructured format, how will it improve consumption of the data organizations have invested so much in making useful? These data sources are among the most trusted in an organization and drive decisions at the highest levels of leadership in many cases. 4 | 5 | Since its inception in the 70’s, Structure Query Language (SQL) has been the most ubiguitous language to interact with a databases but one still needs a deep understanding of set theory, data types, and foreign key relationships in order to make sense of the data. Generative AI offers a way to bridge this knowledge and skills gap by translating natural language questions into a valid SQL query. 6 | 7 | ### Personas 8 | The systems and people standing to benefit from this access pattern to databases includes non-technical folks looking to incorporate relational data sources into their process, like customer service agents and call-center associates. Further, technical use cases include Extract-Transform-Load pipelines, existing Retrieval Augmented Generation (RAG) architectures that integrate relational databases, and organizations who are dealing with a data platform too big to reasonably navigate in isolation. 9 | 10 | ### The Problem 11 | The hardest components of creating an accurate SQL query out of natural language are the same ones we might have struggled with as newcomers to the language. Concepts like identifying foreign key relationships, breaking down the question into smaller, nested queries, and properly joining tables, are among the hardest components of SQL query generation. According to researchers, over 50% of SQL generation tests fail on schema linking and joins alone. 12 | 13 | On top of these core components of the query, each database engine has its own syntax that may warrant mastery of in order to write a valid query. Further, in many organizations, there are many overlapping data attributes - a value is aggregated in one table and not aggregated in another, for example - as well as abbreviated column names that require tribal knowledge to use correctly. 14 | 15 | ### Measuring Success 16 | So how close are we to solving this problem? The community has coalesced around two main leaderboards that rank the most successful approaches with labeled data set: [Spider](https://yale-lily.github.io/spider) and [BIRD](https://bird-bench.github.io/). Both leaderboards prioritize the most important metric for measuring the accuracy of any given approach to solving this problem, called Execution Accuracy (EX). This metric simply compares the generated SQL query to the labeled SQL query to determine if its a match or not. Further, SPIDER measures Exact Set Match Accuracy (EM) – did the returned result set actually answer the question, regardless of how the query was written – and BIRD offers Valid Efficiency Score (VES), a measure how performant the generated SQL query is. You can read more about each benchmark data set on their respective pages. 17 | 18 | The Spider and BIRD datasets have proven to be authoritative, robust data sets to benchmark Text-to-SQL techniques, and even fine-tune models with. Throughout this module we will refer to these datasets and their corresponding leaderboards to demonstrate the most robust approaches to Text-to-SQL. 19 | 20 | ### State of the Art 21 | According to the BIRD leaderboard, the state of the art for the Text-to-SQL problem sits at 60% Execution Accuracy. While that’s still well short of human performance, note that in one year we've moved from the baseline T5 model performing at 7% EM to a year later seeing EM exceed 60%. We’re excited to see how this further improves in the coming year as these models and techniques continue to be researched. 22 | 23 | Its important to note these techniques are optimized for a single thing, which is generating the correct SQL query. These leaderboards don't assess some critical aspects to these techniques, most importantly speed. Many of these techniques demonstrate an end-to-end prompt chain speed of well over a few seconds, which many zero-shot business intelligence use cases can't tolerate. Additionally, many of them also make multiple inferences to an LLM to complete the necessary reasoning, which can drive up the cost per query considerably. 24 | 25 | ### Workshop Content 26 | This workshop is designed to be a progression of Text-to-SQL techniques, starting with robust prompt engineering. All code is in the form of Jupyter Notebooks, hosted in SageMaker Studio. When you're ready to get started, head over to [Setup](./SETUP.md) to begin deployment of the necessary resources for this workshop. 27 | 28 | 29 | Below is an outline of the workshop content: 30 | 31 | * **Module 1: Single-Table Langchain, Optimized for Latency.** We use Amazon Bedrock and Langchain's [SQLDatabase Toolkit](https://python.langchain.com/v0.2/docs/integrations/tools/sql_database/) to query a biomedical dataset. We show here how to minimize latency when the schema is relatively straightforward. 32 | * **Module 2: Advanced Prompt Engineering for Text-to-SQL.** Use Amazon Bedrock to implement some of the State-of-the-Art techniques against an Amazon Athena data set and a relational database. 33 | * **Module 3: Retrieval Augmented Generation (RAG) for Text-to-SQL.** Leverage a FAISS in-memory vector store of data set meta data to improve query accuracy. 34 | * **Module 4: Introduction to Security for Text-to-SQL.** Guard against prompt injection and SQL injection using prompt engineering techniques. 35 | * **Module 5: Fine-tuning for Text-to-SQL.** Fine-tune a Titan model on the Spider Dataset to improve Text-to-SQL accuracy. 36 | -------------------------------------------------------------------------------- /SETUP.md: -------------------------------------------------------------------------------- 1 | > :warning: **This repository is not intended for production use**: The code found here is for demonstration purposes only and not to be used in a production setting! 2 | 3 | # Text-to-SQL Workshop 4 | This workshop was built for those who wish to have a deeper understanding of Generative AI in the context of interacting with a relational data store, such as a database or a data lake. This workshop is divided into modules that each build on the previous while introducing a new technique to solve this problem. Many of these approaches are based on a existing work from the community and cited accordingly. 5 | 6 | 7 | See below for architecture. 8 | 9 | ![Workshop Architecture](/images/workshop_architecture.png "Workshop Architecture") 10 | 11 | ## Account Limits 12 | Note this solution will deploy a VPC in your account. The default account limit for number of VPCs is 5. [Request an increase to this quota](https://docs.aws.amazon.com/servicequotas/latest/userguide/request-quota-increase.html) if you will cross that threshold with this deployment. 13 | 14 | ## Supported Regions 15 | This workshop can be deployed in `us-west-2` or `us-east-1`. If you deploy in any other region, the cloudformation stack will fail to deploy. 16 | 17 | ## Deploy Lambda Layer 18 | This solution requires a version of Boto3 => 1.3 19 | 1. **Package Boto3 as Lambda Layer.** Run the `build_boto3_layer.sh` script to package the boto3 library into a zip. 20 | 1. **Verify Package Created.** Locate the `boto3.zip` file in the `cloudformation/layers` folder of this repository. 21 | 1. **Upload the Zip File to an S3 Location.** Package the Boto3 library into a zip file named boto3.zip. Then, upload this zip file to an Amazon S3 bucket of your choosing. This S3 bucket acts as a storage location from which AWS Lambda can access the Boto3 library. 22 | * Why It's Important: AWS Lambda layers are used to include additional code and content, such as libraries, dependencies, or custom code, in your Lambda function's execution environment. By uploading the boto3.zip file to S3, you're preparing to create a Lambda layer that includes the Boto3 library, which is essential for the AWS SDK for Python. This enables your Lambda functions to interact with AWS services. 23 | * Requirement for s3:GetObject: The AWS account that will deploy the CloudFormation stack must have permissions to access (s3:GetObject) the uploaded boto3.zip file. This permission ensures that when you specify the S3 bucket and object key in the CloudFormation template or parameters, AWS can retrieve the zip file to create the Lambda layer. 24 | 1. **Update the CloudFormation Parameters.** Modify your CloudFormation stack's parameters to include the name of the S3 bucket (LayersBucket) where you've uploaded the boto3.zip file, and the object key (Boto3LayerS3ObjKey) that uniquely identifies the file within the bucket. This is typically done in a parameters JSON file that you pass to CloudFormation during the stack creation or update process. 25 | * Why It's Important: CloudFormation templates can dynamically accept input parameters at runtime. By specifying the LayersBucket and Boto3LayerS3ObjKey, you're telling CloudFormation where to find the Boto3 library zip file for the Lambda layer. This step is crucial for successfully deploying the stack with all its required components, including any Lambda functions that depend on the Boto3 layer. 26 | * Parameter Overrides Example: When deploying your CloudFormation stack using the AWS CLI, you might use a command like this, where `us_west_2.json` is your parameters file: 27 | * **Be sure to update the DBPassword and DBUser values or this stack will not deploy.** 28 | ``` 29 | { 30 | "Parameters": { 31 | "DBPassword": "passwordfordatabase", 32 | "DBUser": "userfordatabase", 33 | "LayersBucket": "bucketname", 34 | "Boto3LayerS3ObjKey": "boto3.zip" 35 | } 36 | } 37 | ``` 38 | 39 | ## Deploy Infrastructure with AWS CLI 40 | This template requires use of an S3 bucket given its size. 41 | ``` 42 | aws cloudformation deploy \ 43 | --stack-name txt2sql \ 44 | --region us-west-2 \ 45 | --template-file ./cloudformation/sagemaker_studio.yml \ 46 | --capabilities CAPABILITY_NAMED_IAM \ 47 | --parameter-overrides file://cloudformation/parameters/us_west_2.json \ 48 | --s3-bucket bucket-to-hold-cfn-template 49 | ``` 50 | 51 | ## Deploy Infrastructure using the Console 52 | To deploy this template using the AWS Console only, [follow the instructions here](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/cfn-console-create-stack.html) by uploading the template found in the `cloudformation` folder named `sagemaker_studio.yml`. 53 | 54 | Be sure to update the parameters for template when deploying in console [as described here](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/cfn-using-console-create-stack-parameters.html). You will need to update the following: 55 | * DBPassword 56 | * DBUser 57 | * LayersBucket 58 | * Boto3LayerS3ObjKey 59 | 60 | Note that the template can take up to 20 minutes to deploy. 61 | 62 | 63 | ## Amazon SageMaker Studio Access 64 | 65 | Amazon SageMaker Studio is a web-based, integrated development environment (IDE) for machine learning that lets you 66 | build, train, debug, deploy, and monitor your machine learning models. Studio provides all the tools you need to take 67 | your models from experimentation to production while boosting your productivity. 68 | 69 | 1. Open the AWS Management Console and switch to AWS region communicated by your instructor. 70 | 71 | 2. Under Services search for Amazon SageMaker. Once there, click on `Studio` on the left menu. 72 | 73 | ![sm-started1](/images/sm-started1.png) 74 | ![sm_studio_menu](/images/sm_studio_menu.png) 75 | 76 | 3. From the drop down under "Get Started" you should see your workshop populated with a user profile of `workshop-user`. Click "Open Studio" to open Sagemaker Studio. 77 | 78 | ![sm-started2](/images/sm-started2.png) 79 | 80 | 4. You will be redirected to a new web tab that looks like this. Click on "View JupyterLab spaces". 81 | 82 | **You are now ready to begin!** 83 | 84 | -------------------------------------------------------------------------------- /build_boto3_layer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | pip install boto3 -t cloudformation/layers/python 4 | cd cloudformation/layers 5 | zip -r boto3.zip python -------------------------------------------------------------------------------- /cloudformation/parameters/us_east_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "Parameters": { 3 | "DBPassword": "", 4 | "DBUser": "", 5 | "LayersBucket": "", 6 | "Boto3LayerS3ObjKey": "boto3.zip" 7 | } 8 | } -------------------------------------------------------------------------------- /cloudformation/parameters/us_west_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "Parameters": { 3 | "DBPassword": "", 4 | "DBUser": "", 5 | "LayersBucket": "", 6 | "Boto3LayerS3ObjKey": "boto3.zip" 7 | } 8 | } -------------------------------------------------------------------------------- /images/DnP.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/DnP.png -------------------------------------------------------------------------------- /images/jupyter_notebook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/jupyter_notebook.png -------------------------------------------------------------------------------- /images/jupyterlab_open.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/jupyterlab_open.png -------------------------------------------------------------------------------- /images/nl2sql_workshop.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/nl2sql_workshop.png -------------------------------------------------------------------------------- /images/notebooks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/notebooks.png -------------------------------------------------------------------------------- /images/rag_nl_to_sql.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/rag_nl_to_sql.png -------------------------------------------------------------------------------- /images/sm-started1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/sm-started1.png -------------------------------------------------------------------------------- /images/sm-started2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/sm-started2.png -------------------------------------------------------------------------------- /images/sm-started3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/sm-started3.png -------------------------------------------------------------------------------- /images/sm-started4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/sm-started4.png -------------------------------------------------------------------------------- /images/sm-started5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/sm-started5.png -------------------------------------------------------------------------------- /images/sm-started6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/sm-started6.png -------------------------------------------------------------------------------- /images/sm_open_jupyterlab_space.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/sm_open_jupyterlab_space.png -------------------------------------------------------------------------------- /images/sm_studio_menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/sm_studio_menu.png -------------------------------------------------------------------------------- /images/sm_studio_new.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/sm_studio_new.png -------------------------------------------------------------------------------- /images/workshop_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/images/workshop_architecture.png -------------------------------------------------------------------------------- /libs/din_sql/din_sql_lib.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import boto3 4 | import sqlalchemy as sa 5 | import logging 6 | import botocore 7 | import jinja2 as j 8 | import os 9 | 10 | # TODO 11 | # prune imports 12 | 13 | # initialize logger 14 | logger = logging.getLogger(__name__) 15 | logger.setLevel(logging.DEBUG) 16 | formatter = logging.Formatter('%(asctime)s:%(levelname)s:%(message)s') 17 | handler = logging.StreamHandler(sys.stdout) 18 | logger.addHandler(handler) 19 | current_dir = os.path.dirname(__file__) 20 | 21 | # ANTHROPIC_CLIENT = Anthropic() 22 | JINJA_ENV = j.Environment( 23 | loader=j.FileSystemLoader(f"{current_dir}/prompt_templates"), 24 | autoescape=j.select_autoescape( 25 | enabled_extensions=('jinja'), 26 | default_for_string=True, 27 | ) 28 | ) 29 | 30 | class DIN_SQL: 31 | def __init__(self, bedrock_model_id): 32 | 33 | self.db_un = None 34 | self.db_pwd = None 35 | self.db_host = None 36 | self.db_port = None 37 | self.db_name = None 38 | self.db_engine = None 39 | self.db_connection = None 40 | self.db_engine_obj = None 41 | self.sql_dialect = None 42 | self.model_id = bedrock_model_id # "anthropic.claude-v2" 43 | self.max_tokens_to_sample = 8000 44 | self.token_summary = { 45 | "input_tokens": 0, 46 | "output_tokens": 0, 47 | } 48 | 49 | self.bedrock_runtime_boto3_client = boto3.client( 50 | service_name='bedrock-runtime', 51 | ) 52 | 53 | # prompts 54 | self.example_tag_start = '' 55 | self.example_tag_end = '' 56 | self.instructions_tag_start = '' 57 | self.instructions_tag_end = '' 58 | self.schema_linking_prompt = JINJA_ENV.get_template('schema_linking_prompt.txt.jinja') 59 | self.classification_prompt = JINJA_ENV.get_template('classification_prompt.txt.jinja') 60 | self.easy_prompt = JINJA_ENV.get_template('easy_prompt.txt.jinja') 61 | self.medium_prompt = JINJA_ENV.get_template('medium_prompt.txt.jinja') 62 | self.hard_prompt = JINJA_ENV.get_template('hard_prompt.txt.jinja') 63 | self.clean_query_prompt = JINJA_ENV.get_template('clean_query_prompt.txt.jinja') 64 | 65 | def athena_connect(self, catalog_name, db_name, s3_prefix, region=None): 66 | """ 67 | Connects to an athena database. 68 | 69 | catalog_name: the name of the catalog to connect to 70 | db_name: the name of the database to connect to 71 | s3_prefix: the prefix of the s3 bucket to use for storing athena results 72 | """ 73 | 74 | region = self.bedrock_runtime_boto3_client.meta.region_name if not region else region 75 | athena_connection_str = f'awsathena+rest://:@athena.{region}.amazonaws.com:443/{db_name}?s3_staging_dir=s3://{s3_prefix}&catalog_name={catalog_name}' 76 | try: 77 | logger.info(f"attempting to connect to athena database with connection string: {athena_connection_str}") 78 | athena_engine = sa.create_engine(athena_connection_str) 79 | self.db_connection = athena_engine.connect() 80 | self.sql_dialect = 'presto' 81 | logger.info("connected to database successfully.") 82 | except sa.exc.SQLAlchemyError as e: 83 | logger.error(f"SQLAlchemy error: {e}") 84 | 85 | 86 | def db_connect(self, db_un, db_pwd, db_host, db_port, db_name, db_engine): 87 | self.db_un = db_un 88 | self.db_pwd = db_pwd 89 | self.db_host = db_host 90 | self.db_port = db_port 91 | self.db_name = db_name 92 | self.db_engine = db_engine 93 | self.sql_dialect = db_engine 94 | database_uri = f"{self.db_engine}://{self.db_un}:{self.db_pwd}@{self.db_host}:{self.db_port}/{self.db_name}" 95 | logger.info(f"attempting to connect to database with uri: {database_uri}") 96 | try: 97 | db_engine_obj = sa.create_engine( 98 | url=database_uri 99 | ) 100 | self.db_connection = db_engine_obj.connect() 101 | logger.info("connected to database successfully.") 102 | except sa.exc.SQLAlchemyError as e: 103 | logger.error(f"SQLAlchemy error: {e}") 104 | 105 | 106 | def reset_token_calculator(self): 107 | """ 108 | Resets the token calculator to zero 109 | """ 110 | self.token_summary = { 111 | "input_tokens": 0, 112 | "output_tokens": 0, 113 | } 114 | 115 | 116 | def query(self, sql_string): 117 | """ 118 | Executes a query and returns the results. Attempts to fix any exceptions and try again. 119 | 120 | sql_string: the sql string to be executed 121 | returns: the results of the query 122 | """ 123 | db_error=None 124 | try: 125 | logger.info(f"attempting to execute query: \n{sql_string}") 126 | logger.info(f"cleaned SQL: \n{sa.text(sql_string)}") 127 | result = self.db_connection.execute(sa.text(sql_string)) 128 | return result.all() 129 | except sa.exc.SQLAlchemyError as e: 130 | db_error = e 131 | 132 | if db_error: 133 | logger.warning(f"Encountered SQLAlchemy error: {db_error}. Attempting to remediate.") 134 | revised_sql = self.revise_query_with_error( 135 | sql_query=sql_string, 136 | error_message=db_error, 137 | sql_tag_start='```sql', 138 | sql_tag_end='```' 139 | ) 140 | try: 141 | logger.info(f"revised SQL: \n{sa.text(revised_sql)}") 142 | new_result = self.db_connection.execute(sa.text(revised_sql)) 143 | return new_result.all() 144 | except sa.exc.SQLAlchemyError as e: 145 | logger.error(f"SQLAlchemy error on revised query: {e}") 146 | return f"SQLAlchemy error: {e}" 147 | 148 | 149 | 150 | def bedrock_claude_prompt_maker(self, prompt): 151 | """ 152 | Checks if claude is being used and adds mandatory prompt elements if needed 153 | 154 | prompt: the prompt to be modified 155 | returns: the modified prompt 156 | raises: None 157 | side effects: adds tokens to the token calculator if claude is being used 158 | """ 159 | if self.model_id.startswith("anthropic.claude"): 160 | new_prompt = f"\n\nHuman: {prompt}\n\nAssistant: " 161 | return new_prompt 162 | else: 163 | return prompt 164 | 165 | 166 | def hard_prompt_maker(self, test_sample_text, database, schema_links, sub_questions, sql_tag_start='```sql', sql_tag_end='```'): 167 | """ 168 | Creates the hard prompt for a given database. 169 | 170 | :param test_sample_text: The natural language question 171 | :param database: The database name to inspect 172 | :return: The hard prompt. 173 | :note word_in_mouth for claude is: A: Let's think step by step. "{question}" can be solved by knowing the answer to the following sub-question "{sub_questions}". The SQL query for the sub-question " 174 | """ 175 | prompt = self.hard_prompt.render( 176 | instruction_tag_start=self.instructions_tag_start, 177 | instruction_tag_end=self.instructions_tag_end, 178 | fields = self.find_fields(db_name=database), 179 | foreign_keys=self.find_foreign_keys(database), 180 | example_tag_start=self.example_tag_start, 181 | example_tag_end=self.example_tag_end, 182 | schema_links=schema_links, 183 | test_sample_text=test_sample_text, 184 | sub_questions=sub_questions, 185 | sql_tag_start=sql_tag_start, 186 | sql_tag_end=sql_tag_end 187 | ) 188 | # return self.bedrock_claude_prompt_maker(prompt) 189 | return prompt 190 | 191 | 192 | def medium_prompt_maker(self, test_sample_text, database, schema_links, sql_tag_start='```sql', sql_tag_end='```'): 193 | """ 194 | Creates the medium prompt for a given database. 195 | 196 | :param test_sample_text: The natural language question 197 | :param database: The database name to inspect 198 | :return: The medium prompt. 199 | :note word_in_mouth for claude is: SQL: {sql_tag_start} 200 | """ 201 | prompt = self.medium_prompt.render( 202 | instruction_tag_start=self.instructions_tag_start, 203 | instruction_tag_end=self.instructions_tag_end, 204 | fields=self.find_fields(db_name=database), 205 | foreign_keys=self.find_foreign_keys(database), 206 | example_tag_start=self.example_tag_start, 207 | example_tag_end=self.example_tag_end, 208 | schema_links=schema_links, 209 | test_sample_text=test_sample_text, 210 | sql_tag_start=sql_tag_start, 211 | sql_tag_end=sql_tag_end 212 | ) 213 | # return self.bedrock_claude_prompt_maker(prompt) 214 | return prompt 215 | 216 | 217 | def easy_prompt_maker(self, test_sample_text, database, schema_links, sql_tag_start='```sql', sql_tag_end='```'): 218 | """ 219 | Creates the easy prompt for a given database. 220 | 221 | :param test_sample_text: The natural language question 222 | :param database: The database name to inspect 223 | :return: The easy prompt. 224 | :note word_in_mouth for claude is: SQL: {sql_tag_start} 225 | """ 226 | prompt = self.easy_prompt.render( 227 | instruction_tag_start=self.instructions_tag_start, 228 | instruction_tag_end=self.instructions_tag_end, 229 | fields=self.find_fields(db_name=database), 230 | example_tag_start=self.example_tag_start, 231 | example_tag_end=self.example_tag_end, 232 | schema_links=schema_links, 233 | test_sample_text=test_sample_text, 234 | sql_tag_start=sql_tag_start, 235 | sql_tag_end=sql_tag_end 236 | ) 237 | # return self.bedrock_claude_prompt_maker(prompt) 238 | return prompt 239 | 240 | 241 | def classification_prompt_maker(self, test_sample_text, database, schema_links): 242 | """ 243 | Creates the classification prompt for a given database. 244 | 245 | :param test_sample_text: The natural language question 246 | :param database: The database name to inspect 247 | :return: The classification of the query required to answer the question. 248 | :note: word_in_mouth for claude here is: A: Let’s think step by step. 249 | """ 250 | 251 | prompt = self.classification_prompt.render( 252 | instruction_tag_start=self.instructions_tag_start, 253 | instruction_tag_end=self.instructions_tag_end, 254 | fields=self.find_fields(db_name=database), 255 | foreign_keys=self.find_foreign_keys(database), 256 | example_tag_start=self.example_tag_start, 257 | example_tag_end=self.example_tag_end, 258 | schema_links=schema_links, 259 | test_sample_text=test_sample_text, 260 | classification_start='' 262 | ) 263 | # return self.bedrock_claude_prompt_maker(prompt) 264 | return prompt 265 | 266 | 267 | def schema_linking_prompt_maker(self, test_sample_text, database): 268 | """ 269 | Creates the schema linking prompt for a given database. 270 | 271 | :param test_sample_text: The natural language question 272 | :param database: The database name to inspect 273 | :return: The schema linking prompt. 274 | :note word_in_mouth for claude here is: A. Let’s think step by step. In the question "{question}", we are asked: 275 | """ 276 | prompt = self.schema_linking_prompt.render( 277 | instruction_tag_start=self.instructions_tag_start, 278 | instruction_tag_end=self.instructions_tag_end, 279 | example_tag_start=self.example_tag_start, 280 | example_tag_end=self.example_tag_end, 281 | fields=self.find_fields(db_name=database), 282 | foreign_keys=self.find_foreign_keys(database), 283 | test_sample_text=test_sample_text, 284 | schema_links_start='', 285 | schema_links_end='' 286 | ) 287 | # return self.bedrock_claude_prompt_maker(prompt) 288 | return prompt 289 | 290 | 291 | def find_foreign_keys(self, db_name): 292 | """ 293 | Finds the foreign keys of a given database. 294 | :param db_name: The name of the database. 295 | :return: A string of the foreign keys. 296 | """ 297 | inspector = sa.inspect(self.db_connection) 298 | schemas = inspector.get_schema_names() 299 | output = "[" 300 | if db_name and db_name in schemas: 301 | for table_name in inspector.get_table_names(schema=db_name): 302 | for fk in inspector.get_foreign_keys(table_name): 303 | 304 | output += ( 305 | f"{table_name}.{fk['constrained_columns'][0]} = {fk['referred_table']}.{fk['referred_columns'][0]}," 306 | ) 307 | else: 308 | for schema in schemas: 309 | if schema != 'information_schema': 310 | for table_name in inspector.get_table_names(schema=schema): 311 | for fk in inspector.get_foreign_keys(table_name): 312 | 313 | output += ( 314 | f"{table_name}.{fk['constrained_columns'][0]} = {fk['referred_table']}.{fk['referred_columns'][0]}," 315 | ) 316 | 317 | output = output[:-1] + "]" 318 | return output if len(output) > 2 else "[]" 319 | 320 | 321 | def find_fields(self, db_name=None): 322 | """ 323 | Finds the fields of a given database. 324 | :param db_name: The name of the database. 325 | :return: A string of the fields. 326 | """ 327 | inspector = sa.inspect(self.db_connection) 328 | schemas = inspector.get_schema_names() 329 | output = "" 330 | if db_name and db_name in schemas: 331 | logger.info(f"database name specified and found, inspecting only '{db_name}'") 332 | tables = inspector.get_table_names(schema=db_name) 333 | for table_name in tables: 334 | output += f"Table {table_name}, columns = [" 335 | for column in inspector.get_columns(table_name, schema=db_name): 336 | output += f"{column['name']}," 337 | output = output[:-1] 338 | output += "]\n" 339 | else: 340 | logger.info(f"No database specified or not found in schemas {schemas}. Inspecting everything.") 341 | for schema in schemas: 342 | if schema != 'information_schema': 343 | tables = inspector.get_table_names(schema=schema) 344 | print(f"Tables:\n{tables}") 345 | for table_name in tables: 346 | print(f"Processing table:\n{table_name}") 347 | output += f"Table {table_name}, columns = [" 348 | for column in inspector.get_columns(table_name, schema=schema): 349 | output += f"{column['name']}," 350 | output = output[:-1] 351 | output += "]\n" 352 | return output if len(output) > 2 else "[]" 353 | 354 | 355 | def find_primary_keys(self, db_name=None): 356 | """ 357 | Finds the primary keys of a given database. 358 | :param db_name: The name of the database. 359 | :return: A string of the primary keys. 360 | """ 361 | inspector = sa.inspect(self.db_connection) 362 | schemas = inspector.get_schema_names() 363 | output = "" 364 | if db_name and db_name in schemas: 365 | logger.info(f"database name specified and found, inspecting PKs only in '{db_name}'") 366 | tables = inspector.get_table_names(schema=db_name) 367 | for table_name in tables: 368 | logger.info(f"getting PKs for table {table_name}") 369 | for pk in inspector.get_pk_constraint(table_name, schema=db_name): 370 | if type(pk) == dict and 'constrained_columns' in pk.keys(): 371 | output += f"{table_name}.{pk['constrained_columns'][0]}," 372 | output = output[:-1] 373 | output += "]\n" 374 | else: 375 | for schema in schemas: 376 | if schema != 'information_schema': 377 | for table_name in inspector.get_table_names(schema=schema): 378 | logger.info(f"getting PKs for table {table_name}") 379 | for pk in inspector.get_pk_constraint(table_name, schema=schema): 380 | if type(pk) == dict and 'constrained_columns' in pk.keys(): 381 | output += f"{table_name}.{pk['constrained_columns'][0]}," 382 | output = output[:-1] 383 | output += "]\n" 384 | return output if len(output) > 2 else "[]" 385 | 386 | 387 | def debugger(self, test_sample_text, database, sql, sql_tag_start='```sql', sql_tag_end='```',sql_dialect='MySQL'): 388 | """ 389 | Generates a prompt for cleaning the given SQL statement using the given sql_dialect. 390 | 391 | :param test_sample_text: The test sample text. 392 | :param database: The name of the database. 393 | :param sql: The SQL statement. 394 | :param sql_tag_start: The start tag for the SQL statement. 395 | :param sql_tag_end: The end tag for the SQL statement. 396 | :param sql_dialect: The SQL dialect. 397 | 398 | :return: The prompt. 399 | """ 400 | fields = self.find_fields(db_name=database) 401 | fields += "Foreign_keys = " + self.find_foreign_keys(database) + "\n" 402 | fields += "Primary_keys = " + self.find_primary_keys(database) 403 | 404 | prompt = self.clean_query_prompt.render( 405 | instruction_tag_start=self.instructions_tag_start, 406 | instruction_tag_end=self.instructions_tag_end, 407 | example_tag_start=self.example_tag_start, 408 | example_tag_end=self.example_tag_end, 409 | revised_qry_start=sql_tag_start, 410 | revised_qry_end=sql_tag_end, 411 | sql_dialect=sql_dialect, 412 | meta_data=fields, 413 | question=test_sample_text, 414 | sql_query=sql 415 | ) 416 | # return self.bedrock_claude_prompt_maker(prompt) 417 | return prompt 418 | 419 | 420 | def llm_generation(self, prompt, stop_sequences=[],word_in_mouth=None): 421 | """ 422 | Invokes the model with the given prompt 423 | 424 | :param prompt: prompt for model 425 | :param stop_sequences: list of stop sequence strings for model to use 426 | :param word_in_mouth: start the assistant's response 427 | 428 | returns: model output 429 | 430 | """ 431 | results=None 432 | try: 433 | if self.model_id.startswith('anthropic'): 434 | 435 | user_message = {"role": "user", "content": prompt} 436 | messages = [user_message] 437 | if word_in_mouth: 438 | messages.append({ 439 | "role": "assistant", 440 | "content": word_in_mouth, 441 | }) 442 | response = self.bedrock_runtime_boto3_client.invoke_model( 443 | modelId=self.model_id, 444 | body=json.dumps({ 445 | "anthropic_version": "bedrock-2023-05-31", 446 | "messages": messages, 447 | "temperature": 0, 448 | "max_tokens": self.max_tokens_to_sample, 449 | "stop_sequences": stop_sequences, 450 | }) 451 | ) 452 | response_dict = json.loads(response.get('body').read().decode("utf-8")) 453 | results = response_dict["content"][0]["text"] 454 | else: 455 | response = self.bedrock_runtime_boto3_client.invoke_model( 456 | modelId=self.model_id, 457 | body=json.dumps({ 458 | "inputText": prompt, 459 | "textGenerationConfig": { 460 | # "maxTokenCount": 4096, 461 | # "stopSequences": [], 462 | "temperature":0, 463 | "topP":1 464 | } 465 | }) 466 | ) 467 | # need to add to token count for other models 468 | results = json.loads(response['body'].read())['results'][0]['outputText'] 469 | logger.info(f"Successfully invoked model {self.model_id}") 470 | except botocore.exceptions.ClientError as e: 471 | logger.error(f"Error in invoking model {self.model_id}: {e}") 472 | return results 473 | 474 | 475 | def debugger_generation(self, prompt): 476 | """ 477 | Cleans a SQL statement for any errors based on the syntax requested. 478 | :param prompt: prompt with SQL statement 479 | returns: 480 | """ 481 | results=None 482 | try: 483 | if self.model_id.startswith('anthropic'): 484 | user_message = {"role": "user", "content": prompt} 485 | messages = [user_message] 486 | response = self.bedrock_runtime_boto3_client.invoke_model( 487 | modelId=self.model_id, 488 | body=json.dumps({ 489 | "anthropic_version": "bedrock-2023-05-31", 490 | "messages": messages, 491 | "temperature": 0, 492 | "max_tokens": self.max_tokens_to_sample, 493 | "stop_sequences": [self.example_tag_end], 494 | }) 495 | ) 496 | response_dict = json.loads(response.get('body').read().decode("utf-8")) 497 | results = response_dict["content"][0]["text"] 498 | else: 499 | response = self.bedrock_runtime_boto3_client.invoke_model( 500 | modelId=self.model_id, 501 | body=json.dumps({ 502 | "inputText": prompt, 503 | "textGenerationConfig": { 504 | "maxTokenCount": 350, 505 | # "stopSequences": [], 506 | "temperature":0, 507 | # "topP":1 508 | } 509 | }) 510 | ) 511 | # need to add to token count for other models 512 | results = json.loads(response['body'].read())['results'][0]['outputText'] 513 | logger.info(f"Successfully invoked model {self.model_id}") 514 | except botocore.exceptions.ClientError as e: 515 | logger.error(f"Error in invoking model {self.model_id}: {e}") 516 | return results 517 | 518 | 519 | def revise_query_with_error(self, sql_query, error_message, sql_tag_start='```sql', sql_tag_end='```'): 520 | """ 521 | Revises a SQL query with an error message. 522 | :param sql_query: The SQL query to revise. 523 | :param error_message: The error message to revise the query with. 524 | :return: The revised SQL query. 525 | """ 526 | retry_sql = self.llm_generation( 527 | f"""Human: 528 | Please provide a new SQL query that correctly fixes the invalid SQL statement below using the SQL Error information. 529 | Only provide one new SQL query in your response and use begin and end tags of "{sql_tag_start}" and "{sql_tag_end}" respectively: 530 | Invalid SQL Statement: {sql_query} 531 | SQL Error: {error_message} 532 | 533 | Assistant: 534 | """ 535 | ) 536 | logger.info(retry_sql) 537 | 538 | return retry_sql.split(sql_tag_start)[1].split(sql_tag_end)[0] 539 | 540 | 541 | def get_sql(self, question, db_name, schema_links, classification): 542 | """ 543 | Generates SQL for the given question. 544 | 545 | :param question: The question to generate SQL for. 546 | :param db_name: The name of the database. 547 | :param schema_links: The schema links. 548 | :param classification: The classification of the question. 549 | 550 | :return: The generated SQL. 551 | """ 552 | logger.info(f"question is classifed as {classification}") 553 | logger.info(f"question asked: {question}") 554 | logger.info(f"schema_links: {schema_links}") 555 | logger.info(f"database name: {db_name}") 556 | sql_tag_start='```sql' 557 | sql_tag_end='```' 558 | SQL = None 559 | try: 560 | if classification == 'EASY': 561 | SQL = self.llm_generation( 562 | prompt=self.easy_prompt_maker( 563 | test_sample_text=question, 564 | database=db_name, 565 | schema_links=schema_links, 566 | sql_tag_start=sql_tag_start, 567 | sql_tag_end=sql_tag_end, 568 | word_in_mouth=f"SQL: {sql_tag_start}" 569 | ), 570 | stop_sequences=[self.example_tag_end]) 571 | elif classification == 'NON-NESTED': 572 | SQL = self.llm_generation( 573 | prompt=self.medium_prompt_maker( 574 | test_sample_text=question, 575 | database=db_name, 576 | schema_links=schema_links, 577 | sql_tag_start=sql_tag_start, 578 | sql_tag_end=sql_tag_end), 579 | stop_sequences=[self.example_tag_end], 580 | word_in_mouth=f"SQL: {sql_tag_start}" 581 | ) 582 | elif classification == 'NESTED': 583 | if classification.find('questions = [') != -1: 584 | sub_questions = classification.split('questions = ["')[1].split('"]')[0] 585 | SQL = self.llm_generation( 586 | prompt=self.hard_prompt_maker( 587 | test_sample_text=question, 588 | database=db_name, 589 | schema_links=schema_links, 590 | sql_tag_start=sql_tag_start, 591 | sql_tag_end=sql_tag_end, 592 | sub_questions=sub_questions), 593 | stop_sequences=[self.example_tag_end], 594 | word_in_mouth=f'''A: Let's think step by step. "{question}" can be solved by knowing the answer to the following sub-question "{sub_questions}". The SQL query for the sub-question " 595 | ''' 596 | ) 597 | else: 598 | logger.info(f"Question was classified as NESTED but no sub_questions were found. Assuming NON-NESTED instead") 599 | SQL = self.llm_generation( 600 | prompt=self.medium_prompt_maker( 601 | test_sample_text=question, 602 | database=db_name, 603 | schema_links=schema_links, 604 | sql_tag_start=sql_tag_start, 605 | sql_tag_end=sql_tag_end), 606 | stop_sequences=[self.example_tag_end], 607 | word_in_mouth=f"SQL: {sql_tag_start}" 608 | ) 609 | else: 610 | logger.error(f"Unknown classification: {classification}") 611 | except Exception as e: 612 | logger.error(f"Error in generating SQL: {e}") 613 | SQL = "SELECT" 614 | 615 | try: 616 | # SQL = SQL.split("SQL: ")[1] 617 | SQL = SQL.split('```sql')[-1].split('```')[0] 618 | except Exception as e: 619 | logger.error(f"SQL slicing error: {e}") 620 | SQL = "SELECT" 621 | 622 | logger.info(f"SQL before debugging: \n{SQL}") 623 | debugged_SQL = self.debugger_generation( 624 | prompt=self.debugger(question, db_name, SQL,sql_dialect=self.sql_dialect) 625 | ).replace("\n", " ") 626 | SQL = debugged_SQL.split('```sql')[1].split('```')[0].strip() 627 | return SQL 628 | 629 | def find_tables(self,db_name): 630 | 631 | inspector = sa.inspect(self.db_connection) 632 | schemas = inspector.get_schema_names() 633 | output = [] 634 | 635 | for schema in schemas: 636 | if schema == db_name: 637 | for table_name in inspector.get_table_names(schema=schema): 638 | output.append(table_name) 639 | return output 640 | 641 | def get_schema(self,db_name,input_table_name): 642 | 643 | inspector = sa.inspect(self.db_connection) 644 | schemas = inspector.get_schema_names() 645 | output = "" 646 | 647 | for schema in schemas: 648 | if schema == db_name: 649 | for table_name in inspector.get_table_names(schema=schema): 650 | if table_name == input_table_name : 651 | for column in inspector.get_columns(table_name, schema=schema): 652 | output += f"{column['name']}" + "|" 653 | 654 | return output 655 | -------------------------------------------------------------------------------- /libs/din_sql/prompt_templates/classification_prompt.txt.jinja: -------------------------------------------------------------------------------- 1 | {{ instruction_tag_start }}You are a relational database expert who can take a natural question and write a SQL statement that will answer the question. 2 | For the given question, classify it as EASY, NON-NESTED, or NESTED based on nested queries and JOIN. 3 | if need nested queries: predict NESTED 4 | elif need JOIN and don't need nested queries: predict NON-NESTED 5 | elif don't need JOIN and don't need nested queries: predict EASY{{ instruction_tag_end }} 6 | 7 | {{fields}} 8 | Foreign_keys = {{foreign_keys}} 9 | 10 | {{ example_tag_start }} 11 | Q: "Find the buildings which have rooms with capacity more than 50." 12 | schema_links: [classroom.building,classroom.capacity,50] 13 | A: Let’s think step by step. The SQL query for the question "Find the buildings which have rooms with capacity more than 50." needs these tables = [classroom], so we don't need JOIN. 14 | Plus, it doesn't require nested queries with (INTERSECT, UNION, EXCEPT, IN, NOT IN), and we need the answer to the questions = [""]. 15 | So, we don't need JOIN and don't need nested queries, then the SQL query can be classified as "EASY". 16 | Label: {{classification_start}}EASY{{classification_end}}{{ example_tag_end }} 17 | 18 | {{ example_tag_start }} 19 | Q: "What are the names of all instructors who advise students in the math depart sorted by total credits of the student." 20 | schema_links: [advisor.i_id = instructor.id,advisor.s_id = student.id,instructor.name,student.dept_name,student.tot_cred,math] 21 | A: Let’s think step by step. The SQL query for the question "What are the names of all instructors who advise students in the math depart sorted by total credits of the student." needs these tables = [advisor,instructor,student], so we need JOIN. 22 | Plus, it doesn't need nested queries with (INTERSECT, UNION, EXCEPT, IN, NOT IN), and we need the answer to the questions = [""]. 23 | So, we need JOIN and don't need nested queries, then the SQL query can be classified as "NON-NESTED". 24 | Label: {{classification_start}}NON-NESTED{{classification_end}}{{ example_tag_end }} 25 | 26 | {{ example_tag_start }} 27 | Q: "Find the room number of the rooms which can sit 50 to 100 students and their buildings." 28 | schema_links: [classroom.building,classroom.room_number,classroom.capacity,50,100] 29 | A: Let’s think step by step. The SQL query for the question "Find the room number of the rooms which can sit 50 to 100 students and their buildings." needs these tables = [classroom], so we don't need JOIN. 30 | Plus, it doesn't require nested queries with (INTERSECT, UNION, EXCEPT, IN, NOT IN), and we need the answer to the questions = [""]. 31 | So, we don't need JOIN and don't need nested queries, then the SQL query can be classified as "EASY". 32 | Label: {{classification_start}}EASY{{classification_end}}{{ example_tag_end }} 33 | 34 | {{ example_tag_start }} 35 | Q: "How many courses that do not have prerequisite?" 36 | schema_links: [course.*,course.course_id = prereq.course_id] 37 | A: Let’s think step by step. The SQL query for the question "How many courses that do not have prerequisite?" needs these tables = [course,prereq], so we need JOIN. 38 | Plus, it requires nested queries with (INTERSECT, UNION, EXCEPT, IN, NOT IN), and we need the answer to the questions = ["Which courses have prerequisite?"]. 39 | So, we need JOIN and need nested queries, then the SQL query can be classified as "NESTED". 40 | Label: {{classification_start}}NESTED{{classification_end}}{{ example_tag_end }} 41 | 42 | {{ example_tag_start }} 43 | Q: "Find the title of course that is provided by both Statistics and Psychology departments." 44 | schema_links: [course.title,course.dept_name,Statistics,Psychology] 45 | A: Let’s think step by step. The SQL query for the question "Find the title of course that is provided by both Statistics and Psychology departments." needs these tables = [course], so we don't need JOIN. 46 | Plus, it requires nested queries with (INTERSECT, UNION, EXCEPT, IN, NOT IN), and we need the answer to the questions = ["Find the titles of courses that is provided by Psychology departments"]. 47 | So, we don't need JOIN and need nested queries, then the SQL query can be classified as "NESTED". 48 | Label: {{classification_start}}NESTED{{classification_end}}{{ example_tag_end }} 49 | 50 | {{ example_tag_start }} 51 | Q: "Find the id of instructors who taught a class in Fall 2009 but not in Spring 2010." 52 | schema_links: [teaches.id,teaches.semester,teaches.year,Fall,2009,Spring,2010] 53 | A: Let’s think step by step. The SQL query for the question "Find the id of instructors who taught a class in Fall 2009 but not in Spring 2010." needs these tables = [teaches], so we don't need JOIN. 54 | Plus, it requires nested queries with (INTERSECT, UNION, EXCEPT, IN, NOT IN), and we need the answer to the questions = ["Find the id of instructors who taught a class in Spring 2010"]. 55 | So, we don't need JOIN and need nested queries, then the SQL query can be classified as "NESTED". 56 | Label: {{classification_start}}NESTED{{classification_end}}{{ example_tag_end }} 57 | 58 | {{ example_tag_start }} 59 | Q: "Find the name of the department that offers the highest total credits?" 60 | schema_links: [course.dept_name,course.credits] 61 | A: Let’s think step by step. The SQL query for the question "Find the name of the department that offers the highest total credits?." needs these tables = [course], so we don't need JOIN. 62 | Plus, it doesn't require nested queries with (INTERSECT, UNION, EXCEPT, IN, NOT IN), and we need the answer to the questions = [""]. 63 | So, we don't need JOIN and don't need nested queries, then the SQL query can be classified as "EASY". 64 | Label: {{classification_start}}EASY{{classification_end}}{{ example_tag_end }} 65 | 66 | {{ example_tag_start }} 67 | Q: "What is the name of the instructor who advises the student with the greatest number of total credits?" 68 | schema_links: [advisor.i_id = instructor.id,advisor.s_id = student.id,instructor.name,student.tot_cred ] 69 | A: Let’s think step by step. The SQL query for the question "What is the name of the instructor who advises the student with the greatest number of total credits?" needs these tables = [advisor,instructor,student], so we need JOIN. 70 | Plus, it doesn't need nested queries with (INTERSECT, UNION, EXCEPT, IN, NOT IN), and we need the answer to the questions = [""]. 71 | So, we need JOIN and don't need nested queries, then the SQL query can be classified as "NON-NESTED". 72 | Label: {{classification_start}}NON-NESTED{{classification_end}}{{ example_tag_end }} 73 | 74 | {{ example_tag_start }} 75 | Q: "Find the total number of students and total number of instructors for each department." 76 | schema_links = [department.dept_name = instructor.dept_name,student.id,student.dept_name = department.dept_name,instructor.id] 77 | A: Let’s think step by step. The SQL query for the question "Find the total number of students and total number of instructors for each department." needs these tables = [department,instructor,student], so we need JOIN. 78 | Plus, it doesn't need nested queries with (INTERSECT, UNION, EXCEPT, IN, NOT IN), and we need the answer to the questions = [""]. 79 | So, we need JOIN and don't need nested queries, then the SQL query can be classified as "NON-NESTED". 80 | Label: {{classification_start}}NON-NESTED{{classification_end}}{{ example_tag_end }} 81 | 82 | {{ example_tag_start }} 83 | Q: "Give the name and building of the departments with greater than average budget." 84 | schema_links: [department.budget,department.dept_name,department.building] 85 | A: Let’s think step by step. The SQL query for the question "Give the name and building of the departments with greater than average budget." needs these tables = [department], so we don't need JOIN. 86 | Plus, it requires nested queries with (INTERSECT, UNION, EXCEPT, IN, NOT IN), and we need the answer to the questions = ["What is the average budget of the departments"]. 87 | So, we don't need JOIN and need nested queries, then the SQL query can be classified as "NESTED". 88 | Label: {{classification_start}}NESTED{{classification_end}}{{ example_tag_end }} 89 | 90 | {{ example_tag_start }} 91 | Q: {{ test_sample_text }} 92 | schema_links: {{ schema_links }} -------------------------------------------------------------------------------- /libs/din_sql/prompt_templates/clean_query_prompt.txt.jinja: -------------------------------------------------------------------------------- 1 | {{ instruction_tag_start }}For the given Question and SQL Query, use the provided tables, columns, foreign keys, and primary keys to convert to a syntactically correct {{sql_dialect}} query. 2 | If there are any problems, fix them. If there are no issues, return the SQL Query as is. 3 | 4 | Use the following rules for fixing the SQL Query in addition to anything else you know about valid {{sql_dialect}} syntax: 5 | 1) Always honor casing of table and column names according to provided list of tables, columns, foreign keys. 6 | 2) Use the database values that are explicitly mentioned in the question. 7 | 3) Pay attention to the columns that are used for the JOIN by using the Foreign_keys. 8 | 4) Use DESC and DISTINCT when needed. 9 | 5) Pay attention to the columns that are used for the GROUP BY statement. 10 | 6) Pay attention to the columns that are used for the SELECT statement. 11 | 7) Only change the GROUP BY clause when necessary (Avoid redundant columns in GROUP BY). 12 | 8) Use GROUP BY on one column only. 13 | 9) Ensure the number of columns and column names match exactly when using UNION and UNION ALL statements{{ instruction_tag_end }} 14 | 15 | {{ example_tag_start }} 16 | Table advisor, columns = [*,s_ID,i_ID] 17 | Table classroom, columns = [*,building,room_number,capacity] 18 | Table course, columns = [*,course_id,title,dept_name,credits] 19 | Table department, columns = [*,dept_name,building,budget] 20 | Table instructor, columns = [*,ID,name,dept_name,salary] 21 | Table prereq, columns = [*,course_id,prereq_id] 22 | Table section, columns = [*,course_id,sec_id,semester,year,building,room_number,time_slot_id] 23 | Table student, columns = [*,ID,name,dept_name,tot_cred] 24 | Table takes, columns = [*,ID,course_id,sec_id,semester,year,grade] 25 | Table teaches, columns = [*,ID,course_id,sec_id,semester,year] 26 | Table time_slot, columns = [*,time_slot_id,day,start_hr,start_min,end_hr,end_min] 27 | Foreign_keys = [course.dept_name = department.dept_name,instructor.dept_name = department.dept_name,section.building = classroom.building,section.room_number = classroom.room_number,section.course_id = course.course_id,teaches.ID = instructor.ID,teaches.course_id = section.course_id,teaches.sec_id = section.sec_id,teaches.semester = section.semester,teaches.year = section.year,student.dept_name = department.dept_name,takes.ID = student.ID,takes.course_id = section.course_id,takes.sec_id = section.sec_id,takes.semester = section.semester,takes.year = section.year,advisor.s_ID = student.ID,advisor.i_ID = instructor.ID,prereq.prereq_id = course.course_id,prereq.course_id = course.course_id] 28 | Primary_keys = [] 29 | 30 | Question: "Find the buildings which have rooms with capacity more than 50." 31 | SQL Query: SELECT DISTINCT building FROM classroom WHERE capacity > 50 32 | Valid {{sql_dialect}} SQL Query (revised):{{revised_qry_start}}SELECT DISTINCT "building" FROM "classroom" WHERE "capacity" > 50{{revised_qry_end}} 33 | {{ example_tag_end }} 34 | 35 | {{meta_data}} 36 | 37 | Question: {{question}} 38 | SQL Query: {{sql_query}} 39 | Valid {{sql_dialect}} SQL Query (revised):{{revised_qry_start}} -------------------------------------------------------------------------------- /libs/din_sql/prompt_templates/easy_prompt.txt.jinja: -------------------------------------------------------------------------------- 1 | {{ instruction_tag_start }}You are a relational database expert who can take a natural question and write a SQL statement that will answer the question. Use the the schema links to generate the SQL queries for each of the questions. {{ instruction_tag_end }} 2 | 3 | {{fields}} 4 | 5 | {{ example_tag_start }} 6 | Q: "Find the buildings which have rooms with capacity more than 50." 7 | Schema_links: [classroom.building,classroom.capacity,50] 8 | SQL: {{sql_tag_start}}SELECT DISTINCT building FROM classroom WHERE capacity > 50{{sql_tag_end}}{{ example_tag_end }} 9 | 10 | {{ example_tag_start }} 11 | Q: "Find the room number of the rooms which can sit 50 to 100 students and their buildings." 12 | Schema_links: [classroom.building,classroom.room_number,classroom.capacity,50,100] 13 | SQL: {{sql_tag_start}}SELECT building , room_number FROM classroom WHERE capacity BETWEEN 50 AND 100{{sql_tag_end}}{{ example_tag_end }} 14 | 15 | {{ example_tag_start }} 16 | Q: "Give the name of the student in the History department with the most credits." 17 | Schema_links: [student.name,student.dept_name,student.tot_cred,History] 18 | SQL: {{sql_tag_start}}SELECT name FROM student WHERE dept_name = 'History' ORDER BY tot_cred DESC LIMIT 1{{sql_tag_end}}{{ example_tag_end }} 19 | 20 | {{ example_tag_start }} 21 | Q: "Find the total budgets of the Marketing or Finance department." 22 | Schema_links: [department.budget,department.dept_name,Marketing,Finance] 23 | SQL: {{sql_tag_start}}SELECT sum(budget) FROM department WHERE dept_name = 'Marketing' OR dept_name = 'Finance'{{sql_tag_end}}{{ example_tag_end }} 24 | 25 | {{ example_tag_start }} 26 | Q: "Find the department name of the instructor whose name contains 'Soisalon'." 27 | Schema_links: [instructor.dept_name,instructor.name,Soisalon] 28 | SQL: {{sql_tag_start}}SELECT dept_name FROM instructor WHERE name LIKE '%Soisalon%'{{sql_tag_end}}{{ example_tag_end }} 29 | 30 | {{ example_tag_start }} 31 | Q: "What is the name of the department with the most credits?" 32 | Schema_links: [course.dept_name,course.credits] 33 | SQL: {{sql_tag_start}}SELECT dept_name FROM course GROUP BY dept_name ORDER BY sum(credits) DESC LIMIT 1{{sql_tag_end}}{{ example_tag_end }} 34 | 35 | {{ example_tag_start }} 36 | Q: "How many instructors teach a course in the Spring of 2010?" 37 | Schema_links: [teaches.ID,teaches.semester,teaches.YEAR,Spring,2010] 38 | SQL: {{sql_tag_start}}SELECT COUNT (DISTINCT ID) FROM teaches WHERE semester = 'Spring' AND YEAR = 2010{{sql_tag_end}}{{ example_tag_end }} 39 | 40 | {{ example_tag_start }} 41 | Q: "Find the name of the students and their department names sorted by their total credits in ascending order." 42 | Schema_links: [student.name,student.dept_name,student.tot_cred] 43 | SQL: {{sql_tag_start}}SELECT name , dept_name FROM student ORDER BY tot_cred{{sql_tag_end}}{{ example_tag_end }} 44 | 45 | {{ example_tag_start }} 46 | Q: "Find the year which offers the largest number of courses." 47 | Schema_links: [SECTION.YEAR,SECTION.*] 48 | SQL: {{sql_tag_start}}SELECT YEAR FROM SECTION GROUP BY YEAR ORDER BY count(*) DESC LIMIT 1{{sql_tag_end}}{{ example_tag_end }} 49 | 50 | {{ example_tag_start }} 51 | Q: "What are the names and average salaries for departments with average salary higher than 42000?" 52 | Schema_links: [instructor.dept_name,instructor.salary,42000] 53 | SQL: {{sql_tag_start}}SELECT dept_name , AVG (salary) FROM instructor GROUP BY dept_name HAVING AVG (salary) > 42000{{sql_tag_end}}{{ example_tag_end }} 54 | 55 | {{ example_tag_start }} 56 | Q: "How many rooms in each building have a capacity of over 50?" 57 | Schema_links: [classroom.*,classroom.building,classroom.capacity,50] 58 | SQL: {{sql_tag_start}}SELECT count(*) , building FROM classroom WHERE capacity > 50 GROUP BY building{{sql_tag_end}}{{ example_tag_end }} 59 | 60 | {{ example_tag_start }} 61 | Q: "Find the names of the top 3 departments that provide the largest amount of courses?" 62 | Schema_links: [course.dept_name,course.*] 63 | SQL: {{sql_tag_start}}SELECT dept_name FROM course GROUP BY dept_name ORDER BY count(*) DESC LIMIT 3{{sql_tag_end}}{{ example_tag_end }} 64 | 65 | {{ example_tag_start }} 66 | Q: "Find the maximum and average capacity among rooms in each building." 67 | Schema_links: [classroom.building,classroom.capacity] 68 | SQL: {{sql_tag_start}}SELECT max(capacity) , avg(capacity) , building FROM classroom GROUP BY building{{sql_tag_end}}{{ example_tag_end }} 69 | 70 | {{ example_tag_start }} 71 | Q: "Find the title of the course that is offered by more than one department." 72 | Schema_links: [course.title] 73 | SQL: {{sql_tag_start}}SELECT title FROM course GROUP BY title HAVING count(*) > 1{{sql_tag_end}}{{ example_tag_end }} 74 | 75 | {{ example_tag_start }} 76 | Q: "{{test_sample_text}}" 77 | Schema_links: {{schema_links}} -------------------------------------------------------------------------------- /libs/din_sql/prompt_templates/hard_prompt.txt.jinja: -------------------------------------------------------------------------------- 1 | {{ instruction_tag_start }}You are a relational database expert who can take a natural question and write a SQL statement that will answer the question. Use the intermediate representation and the schema links to generate the SQL queries for each of the questions.{{ instruction_tag_end }} 2 | 3 | {{fields}} 4 | Foreign_keys = {{foreign_keys}} 5 | 6 | {{ example_tag_start }} 7 | Q: "Find the title of courses that have two prerequisites?" 8 | Schema_links: [course.title,course.course_id = prereq.course_id] 9 | A: Let's think step by step. "Find the title of courses that have two prerequisites?" can be solved by knowing the answer to the following sub-question "What are the titles for courses with two prerequisites?". 10 | The SQL query for the sub-question "What are the titles for courses with two prerequisites?" is SELECT T1.title FROM course AS T1 JOIN prereq AS T2 ON T1.course_id = T2.course_id GROUP BY T2.course_id HAVING count(*) = 2 11 | So, the answer to the question "Find the title of courses that have two prerequisites?" is = 12 | Intermediate_representation: select course.title from course where count ( prereq.* ) = 2 group by prereq.course_id 13 | SQL: {{sql_tag_start}}SELECT T1.title FROM course AS T1 JOIN prereq AS T2 ON T1.course_id = T2.course_id GROUP BY T2.course_id HAVING count(*) = 2{{sql_tag_end}}{{ example_tag_end }} 14 | 15 | {{ example_tag_start }} 16 | Q: "Find the name and building of the department with the highest budget." 17 | Schema_links: [department.dept_name,department.building,department.budget] 18 | A: Let's think step by step. "Find the name and building of the department with the highest budget." can be solved by knowing the answer to the following sub-question "What is the department name and corresponding building for the department with the greatest budget?". 19 | The SQL query for the sub-question "What is the department name and corresponding building for the department with the greatest budget?" is SELECT dept_name , building FROM department ORDER BY budget DESC LIMIT 1 20 | So, the answer to the question "Find the name and building of the department with the highest budget." is = 21 | Intermediate_representation: select department.dept_name , department.building from department order by department.budget desc limit 1 22 | SQL: {{sql_tag_start}}SELECT dept_name , building FROM department ORDER BY budget DESC LIMIT 1{{sql_tag_end}}{{ example_tag_end }} 23 | 24 | {{ example_tag_start }} 25 | Q: "Find the title, credit, and department name of courses that have more than one prerequisites?" 26 | Schema_links: [course.title,course.credits,course.dept_name,course.course_id = prereq.course_id] 27 | A: Let's think step by step. "Find the title, credit, and department name of courses that have more than one prerequisites?" can be solved by knowing the answer to the following sub-question "What is the title, credit value, and department name for courses with more than one prerequisite?". 28 | The SQL query for the sub-question "What is the title, credit value, and department name for courses with more than one prerequisite?" is SELECT T1.title , T1.credits , T1.dept_name FROM course AS T1 JOIN prereq AS T2 ON T1.course_id = T2.course_id GROUP BY T2.course_id HAVING count(*) > 1 29 | So, the answer to the question "Find the name and building of the department with the highest budget." is = 30 | Intermediate_representation: select course.title , course.credits , course.dept_name from course where count ( prereq.* ) > 1 group by prereq.course_id 31 | SQL: {{sql_tag_start}}SELECT T1.title , T1.credits , T1.dept_name FROM course AS T1 JOIN prereq AS T2 ON T1.course_id = T2.course_id GROUP BY T2.course_id HAVING count(*) > 1{{sql_tag_end}}{{ example_tag_end }} 32 | 33 | {{ example_tag_start }} 34 | Q: "Give the name and building of the departments with greater than average budget." 35 | Schema_links: [department.dept_name,department.building,department.budget] 36 | A: Let's think step by step. "Give the name and building of the departments with greater than average budget." can be solved by knowing the answer to the following sub-question "What is the average budget of departments?". 37 | The SQL query for the sub-question "What is the average budget of departments?" is SELECT avg(budget) FROM department 38 | So, the answer to the question "Give the name and building of the departments with greater than average budget." is = 39 | Intermediate_representation: select department.dept_name , department.building from department where @.@ > avg ( department.budget ) 40 | SQL: {{sql_tag_start}}SELECT dept_name , building FROM department WHERE budget > (SELECT avg(budget) FROM department){{sql_tag_end}}{{ example_tag_end }} 41 | 42 | {{ example_tag_start }} 43 | Q: "Find the id of instructors who taught a class in Fall 2009 but not in Spring 2010." 44 | Schema_links: [teaches.id,teaches.semester,teaches.YEAR,Fall,2009,Spring,2010] 45 | A: Let's think step by step. "Find the id of instructors who taught a class in Fall 2009 but not in Spring 2010." can be solved by knowing the answer to the following sub-question "Find the id of instructors who taught a class in Spring 2010". 46 | The SQL query for the sub-question "Find the id of instructors who taught a class in Spring 2010" is SELECT id FROM teaches WHERE semester = 'Spring' AND YEAR = 2010 47 | So, the answer to the question "Find the id of instructors who taught a class in Fall 2009 but not in Spring 2010." is = 48 | Intermediate_representation: select teaches.ID from teaches where teaches.semester = \"Fall\" and teaches.year = 2009 and teaches.semester != \"Spring\" and teaches.year = 2010 49 | SQL: {{sql_tag_start}}SELECT id FROM teaches WHERE semester = 'Fall' AND YEAR = 2009 EXCEPT SELECT id FROM teaches WHERE semester = 'Spring' AND YEAR = 2010{{sql_tag_end}}{{ example_tag_end }} 50 | 51 | {{ example_tag_start }} 52 | Q: "Find the name of the courses that do not have any prerequisite?" 53 | Schema_links: [course.title,course.course_id] 54 | A: Let's think step by step. "Find the name of the courses that do not have any prerequisite?" can be solved by knowing the answer to the following sub-question "What are the courses that have any prerequisite?". 55 | The SQL query for the sub-question "What are the courses that have any prerequisite?" is SELECT course_id FROM prereq 56 | So, the answer to the question "Find the name of the courses that do not have any prerequisite?" is = 57 | Intermediate_representation: select course.title from course where @.@ not in prereq.course_id 58 | SQL: {{sql_tag_start}}SELECT title FROM course WHERE course_id NOT IN (SELECT course_id FROM prereq){{sql_tag_end}}{{ example_tag_end }} 59 | 60 | {{ example_tag_start }} 61 | Q: "Find the salaries of all distinct instructors that are less than the largest salary." 62 | Schema_links: [instructor.salary] 63 | A: Let's think step by step. "Find the salaries of all distinct instructors that are less than the largest salary." can be solved by knowing the answer to the following sub-question "What is the largest salary of instructors". 64 | The SQL query for the sub-question "What is the largest salary of instructors" is SELECT max(salary) FROM instructor 65 | So, the answer to the question "Find the salaries of all distinct instructors that are less than the largest salary." is = 66 | Intermediate_representation: select distinct instructor.salary from instructor where @.@ < max ( instructor.salary ) 67 | SQL: {{sql_tag_start}}SELECT DISTINCT salary FROM instructor WHERE salary < (SELECT max(salary) FROM instructor){{sql_tag_end}}{{ example_tag_end }} 68 | 69 | {{ example_tag_start }} 70 | Q: "Find the names of students who have taken any course in the fall semester of year 2003." 71 | Schema_links: [student.id,student.name,takes.id,takes.semester,fall,2003] 72 | A: Let's think step by step. "Find the names of students who have taken any course in the fall semester of year 2003." can be solved by knowing the answer to the following sub-question "Find the students who have taken any course in the fall semester of year 2003.". 73 | The SQL query for the sub-question "Find the students who have taken any course in the fall semester of year 2003." is SELECT id FROM takes WHERE semester = 'Fall' AND YEAR = 2003 74 | So, the answer to the question "Find the names of students who have taken any course in the fall semester of year 2003." is = 75 | Intermediate_representation: select student.name from student where takes.semester = \"Fall\" and takes.year = 2003 76 | SQL: {{sql_tag_start}}SELECT name FROM student WHERE id IN (SELECT id FROM takes WHERE semester = 'Fall' AND YEAR = 2003){{sql_tag_end}}{{ example_tag_end }} 77 | 78 | {{ example_tag_start }} 79 | Q: "Find the minimum salary for the departments whose average salary is above the average payment of all instructors." 80 | Schema_links: [instructor.salary,instructor.dept_name] 81 | A: Let's think step by step. "Find the minimum salary for the departments whose average salary is above the average payment of all instructors." can be solved by knowing the answer to the following sub-question "What is the average payment of all instructors.". 82 | The SQL query for the sub-question "What is the average payment of all instructors." is SELECT avg(salary) FROM instructor 83 | So, the answer to the question "Find the minimum salary for the departments whose average salary is above the average payment of all instructors." is = 84 | Intermediate_representation: select min(instructor.salary) , instructor.dept_name from instructor where avg ( instructor.salary ) > avg ( instructor.salary ) group by instructor.dept_name 85 | SQL: {{sql_tag_start}}SELECT min(salary) , dept_name FROM instructor GROUP BY dept_name HAVING avg(salary) > (SELECT avg(salary) FROM instructor){{sql_tag_end}}{{ example_tag_end }} 86 | 87 | {{ example_tag_start }} 88 | Q: "What is the course title of the prerequisite of course Mobile Computing?" 89 | Schema_links: [course.title,course.course_id = prereq.course_id,prereq.prereq_id,course.title,Mobile Computing] 90 | A: Let's think step by step. "What is the course title of the prerequisite of course Mobile Computing?" can be solved by knowing the answer to the following sub-question "What are the ids of the prerequisite of course Mobile Computing?". 91 | The SQL query for the sub-question "What are the ids of the prerequisite of course Mobile Computing?" is SSELECT T1.prereq_id FROM prereq AS T1 JOIN course AS T2 ON T1.course_id = T2.course_id WHERE T2.title = 'Mobile Computing' 92 | So, the answer to the question "What is the course title of the prerequisite of course Mobile Computing?" is = 93 | Intermediate_representation: select course.title from course where @.@ in prereq.* and course.title = \"Mobile Computing\" 94 | SQL: {{sql_tag_start}}SELECT title FROM course WHERE course_id IN (SELECT T1.prereq_id FROM prereq AS T1 JOIN course AS T2 ON T1.course_id = T2.course_id WHERE T2.title = 'Mobile Computing'){{sql_tag_end}}{{ example_tag_end }} 95 | 96 | {{ example_tag_start }} 97 | Q: "Give the title and credits for the course that is taught in the classroom with the greatest capacity." 98 | Schema_links: [classroom.capacity,classroom.building = SECTION.building,classroom.room_number = SECTION.room_number,course.title,course.credits,course.course_id = SECTION.course_id] 99 | A: Let's think step by step. "Give the title and credits for the course that is taught in the classroom with the greatest capacity." can be solved by knowing the answer to the following sub-question "What is the capacity of the largest room?". 100 | The SQL query for the sub-question "What is the capacity of the largest room?" is (SELECT max(capacity) FROM classroom) 101 | So, the answer to the question "Give the title and credits for the course that is taught in the classroom with the greatest capacity." is = 102 | Intermediate_representation: select course.title , course.credits from classroom order by classroom.capacity desc limit 1" 103 | SQL: {{sql_tag_start}}SELECT T3.title , T3.credits FROM classroom AS T1 JOIN SECTION AS T2 ON T1.building = T2.building AND T1.room_number = T2.room_number JOIN course AS T3 ON T2.course_id = T3.course_id WHERE T1.capacity = (SELECT max(capacity) FROM classroom){{sql_tag_end}}{{ example_tag_end }} 104 | 105 | {{ example_tag_start }} 106 | Q: "{{test_sample_text}}" 107 | Schema_links: {{schema_links}} -------------------------------------------------------------------------------- /libs/din_sql/prompt_templates/medium_prompt.txt.jinja: -------------------------------------------------------------------------------- 1 | {{ instruction_tag_start }}You are a relational database expert who can take a natural question and write a SQL statement that will answer the question. Use the the schema links and Intermediate_representation to generate the SQL queries for each of the questions.{{ instruction_tag_end }} 2 | 3 | {{fields}} 4 | Foreign_keys = {{foreign_keys}} 5 | 6 | {{ example_tag_start }} 7 | Q: "Find the total budgets of the Marketing or Finance department." 8 | Schema_links: [department.budget,department.dept_name,Marketing,Finance] 9 | A: Let’s think step by step. For creating the SQL for the given question, we need to join these tables = []. First, create an intermediate representation, then use it to construct the SQL query. 10 | Intermediate_representation: select sum(department.budget) from department where department.dept_name = "Marketing" or department.dept_name = "Finance" 11 | SQL: {{sql_tag_start}}SELECT sum(budget) FROM department WHERE dept_name = 'Marketing' OR dept_name = 'Finance'{{sql_tag_end}}{{ example_tag_end }} 12 | 13 | {{ example_tag_start }} 14 | Q: "Find the name and building of the department with the highest budget." 15 | Schema_links: [department.budget,department.dept_name,department.building] 16 | A: Let’s think step by step. For creating the SQL for the given question, we need to join these tables = []. First, create an intermediate representation, then use it to construct the SQL query. 17 | Intermediate_representation: select department.dept_name , department.building from department order by department.budget desc limit 1 18 | SQL: {{sql_tag_start}}SELECT dept_name , building FROM department ORDER BY budget DESC LIMIT 1{{sql_tag_end}}{{ example_tag_end }} 19 | 20 | {{ example_tag_start }} 21 | Q: "What is the name and building of the departments whose budget is more than the average budget?" 22 | Schema_links: [department.budget,department.dept_name,department.building] 23 | A: Let’s think step by step. For creating the SQL for the given question, we need to join these tables = []. First, create an intermediate representation, then use it to construct the SQL query. 24 | Intermediate_representation: select department.dept_name , department.building from department where @.@ > avg ( department.budget ) 25 | SQL: {{sql_tag_start}}SELECT dept_name , building FROM department WHERE budget > (SELECT avg(budget) FROM department){{sql_tag_end}}{{ example_tag_end }} 26 | 27 | {{ example_tag_start }} 28 | Q: "Find the total number of students and total number of instructors for each department." 29 | Schema_links: [department.dept_name = student.dept_name,student.id,department.dept_name = instructor.dept_name,instructor.id] 30 | A: Let’s think step by step. For creating the SQL for the given question, we need to join these tables = [department,student,instructor]. First, create an intermediate representation, then use it to construct the SQL query. 31 | Intermediate_representation: "select count( distinct student.ID) , count( distinct instructor.ID) , department.dept_name from department group by instructor.dept_name 32 | SQL: {{sql_tag_start}}SELECT count(DISTINCT T2.id) , count(DISTINCT T3.id) , T3.dept_name FROM department AS T1 JOIN student AS T2 ON T1.dept_name = T2.dept_name JOIN instructor AS T3 ON T1.dept_name = T3.dept_name GROUP BY T3.dept_name{{sql_tag_end}}{{ example_tag_end }} 33 | 34 | {{ example_tag_start }} 35 | Q: "Find the title of courses that have two prerequisites?" 36 | Schema_links: [course.title,course.course_id = prereq.course_id] 37 | A: Let’s think step by step. For creating the SQL for the given question, we need to join these tables = [course,prereq]. First, create an intermediate representation, then use it to construct the SQL query. 38 | Intermediate_representation: select course.title from course where count ( prereq.* ) = 2 group by prereq.course_id 39 | SQL: {{sql_tag_start}}SELECT T1.title FROM course AS T1 JOIN prereq AS T2 ON T1.course_id = T2.course_id GROUP BY T2.course_id HAVING count(*) = 2{{sql_tag_end}}{{ example_tag_end }} 40 | 41 | {{ example_tag_start }} 42 | Q: "Find the name of students who took any class in the years of 2009 and 2010." 43 | Schema_links: [student.name,student.id = takes.id,takes.YEAR,2009,2010] 44 | A: Let’s think step by step. For creating the SQL for the given question, we need to join these tables = [student,takes]. First, create an intermediate representation, then use it to construct the SQL query. 45 | Intermediate_representation: select distinct student.name from student where takes.year = 2009 or takes.year = 2010 46 | SQL: {{sql_tag_start}}SELECT DISTINCT T1.name FROM student AS T1 JOIN takes AS T2 ON T1.id = T2.id WHERE T2.YEAR = 2009 OR T2.YEAR = 2010{{sql_tag_end}}{{ example_tag_end }} 47 | 48 | {{ example_tag_start }} 49 | Q: "list in alphabetic order all course names and their instructors' names in year 2008." 50 | Schema_links: [course.title,course.course_id = teaches.course_id,teaches.id = instructor.id,instructor.name,teaches.year,2008] 51 | A: Let’s think step by step. For creating the SQL for the given question, we need to join these tables = [course,teaches,instructor]. First, create an intermediate representation, then use it to construct the SQL query. 52 | Intermediate_representation: select course.title , instructor.name from course where teaches.year = 2008 order by course.title asc 53 | SQL: {{sql_tag_start}}SELECT T1.title , T3.name FROM course AS T1 JOIN teaches AS T2 ON T1.course_id = T2.course_id JOIN instructor AS T3 ON T2.id = T3.id WHERE T2.YEAR = 2008 ORDER BY T1.title{{sql_tag_end}}{{ example_tag_end }} 54 | 55 | {{ example_tag_start }} 56 | Q: "{{test_sample_text}}" 57 | Schema_links: {{schema_links}} -------------------------------------------------------------------------------- /libs/din_sql/prompt_templates/schema_linking_prompt.txt.jinja: -------------------------------------------------------------------------------- 1 | {{ instruction_tag_start }} Find the schema_links for generating SQL queries for each question based on the database schema and Foreign keys provided. {{ instruction_tag_end }} 2 | 3 | {{ example_tag_start }} 4 | Table advisor, columns = [*,s_ID,i_ID] 5 | Table classroom, columns = [*,building,room_number,capacity] 6 | Table course, columns = [*,course_id,title,dept_name,credits] 7 | Table department, columns = [*,dept_name,building,budget] 8 | Table instructor, columns = [*,ID,name,dept_name,salary] 9 | Table prereq, columns = [*,course_id,prereq_id] 10 | Table section, columns = [*,course_id,sec_id,semester,year,building,room_number,time_slot_id] 11 | Table student, columns = [*,ID,name,dept_name,tot_cred] 12 | Table takes, columns = [*,ID,course_id,sec_id,semester,year,grade] 13 | Table teaches, columns = [*,ID,course_id,sec_id,semester,year] 14 | Table time_slot, columns = [*,time_slot_id,day,start_hr,start_min,end_hr,end_min] 15 | Foreign_keys = [course.dept_name = department.dept_name,instructor.dept_name = department.dept_name,section.building = classroom.building,section.room_number = classroom.room_number,section.course_id = course.course_id,teaches.ID = instructor.ID,teaches.course_id = section.course_id,teaches.sec_id = section.sec_id,teaches.semester = section.semester,teaches.year = section.year,student.dept_name = department.dept_name,takes.ID = student.ID,takes.course_id = section.course_id,takes.sec_id = section.sec_id,takes.semester = section.semester,takes.year = section.year,advisor.s_ID = student.ID,advisor.i_ID = instructor.ID,prereq.prereq_id = course.course_id,prereq.course_id = course.course_id] 16 | Q: "Find the buildings which have rooms with capacity more than 50." 17 | A: Let’s think step by step. In the question "Find the buildings which have rooms with capacity more than 50.", we are asked: 18 | "the buildings which have rooms" so we need column = [classroom.capacity] 19 | "rooms with capacity" so we need column = [classroom.building] 20 | Based on the columns and tables, we need these Foreign_keys = []. 21 | Based on the tables, columns, and Foreign_keys, The set of possible cell values are = [50]. So the Schema_links are: 22 | Schema_links: {{schema_links_start}}[classroom.building,classroom.capacity,50]{{schema_links_end}}{{ example_tag_end }} 23 | 24 | {{ example_tag_start }} 25 | Table department, columns = [*,Department_ID,Name,Creation,Ranking,Budget_in_Billions,Num_Employees] 26 | Table head, columns = [*,head_ID,name,born_state,age] 27 | Table management, columns = [*,department_ID,head_ID,temporary_acting] 28 | Foreign_keys = [management.head_ID = head.head_ID,management.department_ID = department.Department_ID] 29 | Q: "How many heads of the departments are older than 56 ?" 30 | A: Let’s think step by step. In the question "How many heads of the departments are older than 56 ?", we are asked: 31 | "How many heads of the departments" so we need column = [head.*] 32 | "older" so we need column = [head.age] 33 | Based on the columns and tables, we need these Foreign_keys = []. 34 | Based on the tables, columns, and Foreign_keys, The set of possible cell values are = [56]. So the Schema_links are: 35 | Schema_links: {{schema_links_start}}[head.*,head.age,56]{{schema_links_end}}{{ example_tag_end }} 36 | 37 | {{ example_tag_start }} 38 | Table department, columns = [*,Department_ID,Name,Creation,Ranking,Budget_in_Billions,Num_Employees] 39 | Table head, columns = [*,head_ID,name,born_state,age] 40 | Table management, columns = [*,department_ID,head_ID,temporary_acting] 41 | Foreign_keys = [management.head_ID = head.head_ID,management.department_ID = department.Department_ID] 42 | Q: "what are the distinct creation years of the departments managed by a secretary born in state 'Alabama'?" 43 | A: Let’s think step by step. In the question "what are the distinct creation years of the departments managed by a secretary born in state 'Alabama'?", we are asked: 44 | "distinct creation years of the departments" so we need column = [department.Creation] 45 | "departments managed by" so we need column = [management.department_ID] 46 | "born in" so we need column = [head.born_state] 47 | Based on the columns and tables, we need these Foreign_keys = [department.Department_ID = management.department_ID,management.head_ID = head.head_ID]. 48 | Based on the tables, columns, and Foreign_keys, The set of possible cell values are = ['Alabama']. So the Schema_links are: 49 | Schema_links: {{schema_links_start}}[department.Creation,department.Department_ID = management.department_ID,head.head_ID = management.head_ID,head.born_state,'Alabama']{{schema_links_end}}{{ example_tag_end }} 50 | 51 | {{ example_tag_start }} 52 | Table Addresses, columns = [*,address_id,line_1,line_2,city,zip_postcode,state_province_county,country] 53 | Table Candidate_Assessments, columns = [*,candidate_id,qualification,assessment_date,asessment_outcome_code] 54 | Table Candidates, columns = [*,candidate_id,candidate_details] 55 | Table Courses, columns = [*,course_id,course_name,course_description,other_details] 56 | Table People, columns = [*,person_id,first_name,middle_name,last_name,cell_mobile_number,email_address,login_name,password] 57 | Table People_Addresses, columns = [*,person_address_id,person_id,address_id,date_from,date_to] 58 | Table Student_Course_Attendance, columns = [*,student_id,course_id,date_of_attendance] 59 | Table Student_Course_Registrations, columns = [*,student_id,course_id,registration_date] 60 | Table Students, columns = [*,student_id,student_details] 61 | Foreign_keys = [Students.student_id = People.person_id,People_Addresses.address_id = Addresses.address_id,People_Addresses.person_id = People.person_id,Student_Course_Registrations.course_id = Courses.course_id,Student_Course_Registrations.student_id = Students.student_id,Student_Course_Attendance.student_id = Student_Course_Registrations.student_id,Student_Course_Attendance.course_id = Student_Course_Registrations.course_id,Candidates.candidate_id = People.person_id,Candidate_Assessments.candidate_id = Candidates.candidate_id] 62 | Q: "List the id of students who never attends courses?" 63 | A: Let’s think step by step. In the question "List the id of students who never attends courses?", we are asked: 64 | "id of students" so we need column = [Students.student_id] 65 | "never attends courses" so we need column = [Student_Course_Attendance.student_id] 66 | Based on the columns and tables, we need these Foreign_keys = [Students.student_id = Student_Course_Attendance.student_id]. 67 | Based on the tables, columns, and Foreign_keys, The set of possible cell values are = []. So the Schema_links are: 68 | Schema_links: {{schema_links_start}}[Students.student_id = Student_Course_Attendance.student_id]{{schema_links_end}}{{ example_tag_end }} 69 | 70 | {{ example_tag_start }} 71 | Table Country, columns = [*,id,name] 72 | Table League, columns = [*,id,country_id,name] 73 | Table Player, columns = [*,id,player_api_id,player_name,player_fifa_api_id,birthday,height,weight] 74 | Table Player_Attributes, columns = [*,id,player_fifa_api_id,player_api_id,date,overall_rating,potential,preferred_foot,attacking_work_rate,defensive_work_rate,crossing,finishing,heading_accuracy,short_passing,volleys,dribbling,curve,free_kick_accuracy,long_passing,ball_control,acceleration,sprint_speed,agility,reactions,balance,shot_power,jumping,stamina,strength,long_shots,aggression,interceptions,positioning,vision,penalties,marking,standing_tackle,sliding_tackle,gk_diving,gk_handling,gk_kicking,gk_positioning,gk_reflexes] 75 | Table Team, columns = [*,id,team_api_id,team_fifa_api_id,team_long_name,team_short_name] 76 | Table Team_Attributes, columns = [*,id,team_fifa_api_id,team_api_id,date,buildUpPlaySpeed,buildUpPlaySpeedClass,buildUpPlayDribbling,buildUpPlayDribblingClass,buildUpPlayPassing,buildUpPlayPassingClass,buildUpPlayPositioningClass,chanceCreationPassing,chanceCreationPassingClass,chanceCreationCrossing,chanceCreationCrossingClass,chanceCreationShooting,chanceCreationShootingClass,chanceCreationPositioningClass,defencePressure,defencePressureClass,defenceAggression,defenceAggressionClass,defenceTeamWidth,defenceTeamWidthClass,defenceDefenderLineClass] 77 | Table sqlite_sequence, columns = [*,name,seq] 78 | Foreign_keys = [Player_Attributes.player_api_id = Player.player_api_id,Player_Attributes.player_fifa_api_id = Player.player_fifa_api_id,League.country_id = Country.id,Team_Attributes.team_api_id = Team.team_api_id,Team_Attributes.team_fifa_api_id = Team.team_fifa_api_id] 79 | Q: "List the names of all left-footed players who have overall rating between 85 and 90." 80 | A: Let’s think step by step. In the question "List the names of all left-footed players who have overall rating between 85 and 90.", we are asked: 81 | "names of all left-footed players" so we need column = [Player.player_name,Player_Attributes.preferred_foot] 82 | "players who have overall rating" so we need column = [Player_Attributes.overall_rating] 83 | Based on the columns and tables, we need these Foreign_keys = [Player_Attributes.player_api_id = Player.player_api_id]. 84 | Based on the tables, columns, and Foreign_keys, The set of possible cell values are = [left,85,90]. So the Schema_links are: 85 | Schema_links: {{schema_links_start}}[Player.player_name,Player_Attributes.preferred_foot,Player_Attributes.overall_rating,Player_Attributes.player_api_id = Player.player_api_id,left,85,90]{{schema_links_end}}{{ example_tag_end }} 86 | 87 | {{ example_tag_start }} 88 | Table advisor, columns = [*,s_ID,i_ID] 89 | Table classroom, columns = [*,building,room_number,capacity] 90 | Table course, columns = [*,course_id,title,dept_name,credits] 91 | Table department, columns = [*,dept_name,building,budget] 92 | Table instructor, columns = [*,ID,name,dept_name,salary] 93 | Table prereq, columns = [*,course_id,prereq_id] 94 | Table section, columns = [*,course_id,sec_id,semester,year,building,room_number,time_slot_id] 95 | Table student, columns = [*,ID,name,dept_name,tot_cred] 96 | Table takes, columns = [*,ID,course_id,sec_id,semester,year,grade] 97 | Table teaches, columns = [*,ID,course_id,sec_id,semester,year] 98 | Table time_slot, columns = [*,time_slot_id,day,start_hr,start_min,end_hr,end_min] 99 | Foreign_keys = [course.dept_name = department.dept_name,instructor.dept_name = department.dept_name,section.building = classroom.building,section.room_number = classroom.room_number,section.course_id = course.course_id,teaches.ID = instructor.ID,teaches.course_id = section.course_id,teaches.sec_id = section.sec_id,teaches.semester = section.semester,teaches.year = section.year,student.dept_name = department.dept_name,takes.ID = student.ID,takes.course_id = section.course_id,takes.sec_id = section.sec_id,takes.semester = section.semester,takes.year = section.year,advisor.s_ID = student.ID,advisor.i_ID = instructor.ID,prereq.prereq_id = course.course_id,prereq.course_id = course.course_id] 100 | Q: "Give the title of the course offered in Chandler during the Fall of 2010." 101 | A: Let’s think step by step. In the question "Give the title of the course offered in Chandler during the Fall of 2010.", we are asked: 102 | "title of the course" so we need column = [course.title] 103 | "course offered in Chandler" so we need column = [SECTION.building] 104 | "during the Fall" so we need column = [SECTION.semester] 105 | "of 2010" so we need column = [SECTION.year] 106 | Based on the columns and tables, we need these Foreign_keys = [course.course_id = SECTION.course_id]. 107 | Based on the tables, columns, and Foreign_keys, The set of possible cell values are = [Chandler,Fall,2010]. So the Schema_links are: 108 | Schema_links: {{schema_links_start}}[course.title,course.course_id = SECTION.course_id,SECTION.building,SECTION.year,SECTION.semester,Chandler,Fall,2010]{{schema_links_end}}{{ example_tag_end }} 109 | 110 | {{ example_tag_start }} 111 | Table city, columns = [*,City_ID,Official_Name,Status,Area_km_2,Population,Census_Ranking] 112 | Table competition_record, columns = [*,Competition_ID,Farm_ID,Rank] 113 | Table farm, columns = [*,Farm_ID,Year,Total_Horses,Working_Horses,Total_Cattle,Oxen,Bulls,Cows,Pigs,Sheep_and_Goats] 114 | Table farm_competition, columns = [*,Competition_ID,Year,Theme,Host_city_ID,Hosts] 115 | Foreign_keys = [farm_competition.Host_city_ID = city.City_ID,competition_record.Farm_ID = farm.Farm_ID,competition_record.Competition_ID = farm_competition.Competition_ID] 116 | Q: "Show the status of the city that has hosted the greatest number of competitions." 117 | A: Let’s think step by step. In the question "Show the status of the city that has hosted the greatest number of competitions.", we are asked: 118 | "the status of the city" so we need column = [city.Status] 119 | "greatest number of competitions" so we need column = [farm_competition.*] 120 | Based on the columns and tables, we need these Foreign_keys = [farm_competition.Host_city_ID = city.City_ID]. 121 | Based on the tables, columns, and Foreign_keys, The set of possible cell values are = []. So the Schema_links are: 122 | Schema_links: {{schema_links_start}}[city.Status,farm_competition.Host_city_ID = city.City_ID,farm_competition.*]{{schema_links_end}}{{ example_tag_end }} 123 | 124 | {{ example_tag_start }} 125 | Table advisor, columns = [*,s_ID,i_ID] 126 | Table classroom, columns = [*,building,room_number,capacity] 127 | Table course, columns = [*,course_id,title,dept_name,credits] 128 | Table department, columns = [*,dept_name,building,budget] 129 | Table instructor, columns = [*,ID,name,dept_name,salary] 130 | Table prereq, columns = [*,course_id,prereq_id] 131 | Table section, columns = [*,course_id,sec_id,semester,year,building,room_number,time_slot_id] 132 | Table student, columns = [*,ID,name,dept_name,tot_cred] 133 | Table takes, columns = [*,ID,course_id,sec_id,semester,year,grade] 134 | Table teaches, columns = [*,ID,course_id,sec_id,semester,year] 135 | Table time_slot, columns = [*,time_slot_id,day,start_hr,start_min,end_hr,end_min] 136 | Foreign_keys = [course.dept_name = department.dept_name,instructor.dept_name = department.dept_name,section.building = classroom.building,section.room_number = classroom.room_number,section.course_id = course.course_id,teaches.ID = instructor.ID,teaches.course_id = section.course_id,teaches.sec_id = section.sec_id,teaches.semester = section.semester,teaches.year = section.year,student.dept_name = department.dept_name,takes.ID = student.ID,takes.course_id = section.course_id,takes.sec_id = section.sec_id,takes.semester = section.semester,takes.year = section.year,advisor.s_ID = student.ID,advisor.i_ID = instructor.ID,prereq.prereq_id = course.course_id,prereq.course_id = course.course_id] 137 | Q: "Find the id of instructors who taught a class in Fall 2009 but not in Spring 2010." 138 | A: Let’s think step by step. In the question "Find the id of instructors who taught a class in Fall 2009 but not in Spring 2010.", we are asked: 139 | "id of instructors who taught " so we need column = [teaches.id] 140 | "taught a class in" so we need column = [teaches.semester,teaches.year] 141 | Based on the columns and tables, we need these Foreign_keys = []. 142 | Based on the tables, columns, and Foreign_keys, The set of possible cell values are = [Fall,2009,Spring,2010]. So the Schema_links are: 143 | Schema_links: {{schema_links_start}}[teaches.id,teaches.semester,teaches.year,Fall,2009,Spring,2010]{{schema_links_end}}{{ example_tag_end }} 144 | 145 | {{ example_tag_start }} 146 | Table Accounts, columns = [*,account_id,customer_id,date_account_opened,account_name,other_account_details] 147 | Table Customers, columns = [*,customer_id,customer_first_name,customer_middle_initial,customer_last_name,gender,email_address,login_name,login_password,phone_number,town_city,state_county_province,country] 148 | Table Financial_Transactions, columns = [*,transaction_id,account_id,invoice_number,transaction_type,transaction_date,transaction_amount,transaction_comment,other_transaction_details] 149 | Table Invoice_Line_Items, columns = [*,order_item_id,invoice_number,product_id,product_title,product_quantity,product_price,derived_product_cost,derived_vat_payable,derived_total_cost] 150 | Table Invoices, columns = [*,invoice_number,order_id,invoice_date] 151 | Table Order_Items, columns = [*,order_item_id,order_id,product_id,product_quantity,other_order_item_details] 152 | Table Orders, columns = [*,order_id,customer_id,date_order_placed,order_details] 153 | Table Product_Categories, columns = [*,production_type_code,product_type_description,vat_rating] 154 | Table Products, columns = [*,product_id,parent_product_id,production_type_code,unit_price,product_name,product_color,product_size] 155 | Foreign_keys = [Orders.customer_id = Customers.customer_id,Invoices.order_id = Orders.order_id,Accounts.customer_id = Customers.customer_id,Products.production_type_code = Product_Categories.production_type_code,Financial_Transactions.account_id = Accounts.account_id,Financial_Transactions.invoice_number = Invoices.invoice_number,Order_Items.order_id = Orders.order_id,Order_Items.product_id = Products.product_id,Invoice_Line_Items.product_id = Products.product_id,Invoice_Line_Items.invoice_number = Invoices.invoice_number,Invoice_Line_Items.order_item_id = Order_Items.order_item_id] 156 | Q: "Show the id, the date of account opened, the account name, and other account detail for all accounts." 157 | A: Let’s think step by step. In the question "Show the id, the date of account opened, the account name, and other account detail for all accounts.", we are asked: 158 | "the id, the date of account opened, the account name, and other account detail for all accounts." so we need column = [Accounts.account_id,Accounts.account_name,Accounts.other_account_details,Accounts.date_account_opened] 159 | Based on the columns and tables, we need these Foreign_keys = []. 160 | Based on the tables, columns, and Foreign_keys, The set of possible cell values are = []. So the Schema_links are: 161 | Schema_links: {{schema_links_start}}[Accounts.account_id,Accounts.account_name,Accounts.other_account_details,Accounts.date_account_opened]{{schema_links_end}}{{ example_tag_end }} 162 | 163 | {{ example_tag_start }} 164 | Table city, columns = [*,City_ID,Official_Name,Status,Area_km_2,Population,Census_Ranking] 165 | Table competition_record, columns = [*,Competition_ID,Farm_ID,Rank] 166 | Table farm, columns = [*,Farm_ID,Year,Total_Horses,Working_Horses,Total_Cattle,Oxen,Bulls,Cows,Pigs,Sheep_and_Goats] 167 | Table farm_competition, columns = [*,Competition_ID,Year,Theme,Host_city_ID,Hosts] 168 | Foreign_keys = [farm_competition.Host_city_ID = city.City_ID,competition_record.Farm_ID = farm.Farm_ID,competition_record.Competition_ID = farm_competition.Competition_ID] 169 | Q: "Show the status shared by cities with population bigger than 1500 and smaller than 500." 170 | A: Let’s think step by step. In the question "Show the status shared by cities with population bigger than 1500 and smaller than 500.", we are asked: 171 | "the status shared by cities" so we need column = [city.Status] 172 | "cities with population" so we need column = [city.Population] 173 | Based on the columns and tables, we need these Foreign_keys = []. 174 | Based on the tables, columns, and Foreign_keys, The set of possible cell values are = [1500,500]. So the Schema_links are: 175 | Schema_links: {{schema_links_start}}[city.Status,city.Population,1500,500]{{schema_links_end}}{{ example_tag_end }} 176 | 177 | {{ example_tag_start }} 178 | {{ fields }} 179 | Foreign_keys = {{ foreign_keys }} 180 | Q: "{{ test_sample_text }}" -------------------------------------------------------------------------------- /module_1/01_single-table-optimized-for-latency.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0621f124-5629-46b5-a4b9-32d008c43493", 6 | "metadata": {}, 7 | "source": [ 8 | "## Text-to-SQL on a biomedical dataset, optimized for low latency on a single-table\n", 9 | "---\n", 10 | "We show here how to build a conversational chatbot that is able to extract information from a relational database with a single table. This is a relatively simple example of text-to-SQL, as there are no joins required. We focus here on showing how to optimize latency using the [SQLDatabaseToolkit](https://python.langchain.com/v0.2/docs/integrations/toolkits/sql_database/) from [LangChain](https://www.langchain.com).\n", 11 | "\n", 12 | "In the generic case, SQLDatabaseToolkit uses the ReAct framework to make multiple calls to the LLM: to ask the database what tables it contains, to ask the database for the schema of a subset of those tables, to test a possible SQL query, to run a query, and more. Given that we know the database has only one table we can make fewer calls to the LLM and hence reduce the latency of the overall text-to-SQL process.\n", 13 | "\n", 14 | "We use the following database of diabetes patients, which has been downloaded for you as the file `diabetes.csv`:\n", 15 | "```\n", 16 | "@article{Machado2024,\n", 17 | " author = \"Angela Machado\",\n", 18 | " title = \"{diabetes.csv}\",\n", 19 | " year = \"2024\",\n", 20 | " month = \"3\",\n", 21 | " url = \"https://figshare.com/articles/dataset/diabetes_csv/25421347\",\n", 22 | " doi = \"10.6084/m9.figshare.25421347.v1\"\n", 23 | "}\n", 24 | "```\n", 25 | "\n", 26 | "Note that the following `pip install` commands may generate warnings: you can safely ignore these." 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "id": "fe41174b-9c71-47ac-b53e-aa0161241dda", 33 | "metadata": { 34 | "tags": [] 35 | }, 36 | "outputs": [], 37 | "source": [ 38 | "%pip install -qU openpyxl langchain boto3\n", 39 | "%pip install -qU langchain-community langchain-aws" 40 | ] 41 | }, 42 | { 43 | "cell_type": "code", 44 | "execution_count": null, 45 | "id": "390a3512-34dd-4488-8e94-efb0ef48b7b3", 46 | "metadata": { 47 | "tags": [] 48 | }, 49 | "outputs": [], 50 | "source": [ 51 | "import os\n", 52 | "import sys\n", 53 | "from typing import List, Tuple\n", 54 | "import itertools\n", 55 | "from time import time\n", 56 | "\n", 57 | "import jinja2\n", 58 | "from langchain_community.utilities import SQLDatabase\n", 59 | "import sqlite3\n", 60 | "import boto3\n", 61 | "import pandas as pd\n", 62 | "from langchain_aws import ChatBedrock\n", 63 | "from langchain_community.agent_toolkits.sql.base import create_sql_agent\n", 64 | "from langchain_community.agent_toolkits.sql.toolkit import SQLDatabaseToolkit\n", 65 | "from langchain.agents.agent_types import AgentType\n", 66 | "from langchain.chains import create_sql_query_chain\n", 67 | "from langchain_core.prompts import PromptTemplate\n", 68 | "from langchain.callbacks.base import BaseCallbackHandler\n", 69 | "\n", 70 | "sys.path.append('../')\n", 71 | "import utilities as u" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "id": "dd87d970-10f2-4e18-a487-21e59dc44a65", 78 | "metadata": { 79 | "tags": [] 80 | }, 81 | "outputs": [], 82 | "source": [ 83 | "model_id = \"anthropic.claude-3-sonnet-20240229-v1:0\"\n", 84 | "# model_id = \"anthropic.claude-3-haiku-20240307-v1:0\"\n", 85 | "con = sqlite3.connect(\"test.db\")\n", 86 | "jenv = jinja2.Environment(trim_blocks=True, lstrip_blocks=True)\n", 87 | "# This is a useful way to keep track of tool invocations:\n", 88 | "#os.environ[\"LANGCHAIN_TRACING_V2\"] = \"true\"\n", 89 | "#os.environ[\"LANGCHAIN_API_KEY\"] = \"...\"\n", 90 | "os.environ[\"AWS_DEFAULT_REGION\"] = \"us-west-2\"\n", 91 | "\n", 92 | "is_conversational = True\n", 93 | "force_setup_db = False\n", 94 | "do_few_shot_prompting = False\n", 95 | "show_SQL = True\n", 96 | "\n", 97 | "llm = ChatBedrock(model_id=model_id, region_name=\"us-west-2\")\n", 98 | "db = SQLDatabase.from_uri(\"sqlite:///test.db\")\n", 99 | "context = db.get_context()\n", 100 | "chain = create_sql_query_chain(llm, db)" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "id": "4c2f56ad-56de-4a56-bd70-36b6ca1ae7cf", 106 | "metadata": {}, 107 | "source": [ 108 | "### Load the test data into a database\n", 109 | "\n", 110 | "First, we load the CSV file into a DataFrame and take a look at some rows:" 111 | ] 112 | }, 113 | { 114 | "cell_type": "code", 115 | "execution_count": null, 116 | "id": "9337b4ba-3ab5-4968-a5a8-7e1050a57c27", 117 | "metadata": { 118 | "tags": [] 119 | }, 120 | "outputs": [], 121 | "source": [ 122 | "df = pd.read_csv(\"diabetes.csv\")\n", 123 | "df.head()" 124 | ] 125 | }, 126 | { 127 | "cell_type": "markdown", 128 | "id": "e278403a-6731-48c0-ad70-e84d4f6f1b45", 129 | "metadata": {}, 130 | "source": [ 131 | "Next, we load this data into a SQLite database:" 132 | ] 133 | }, 134 | { 135 | "cell_type": "code", 136 | "execution_count": null, 137 | "id": "fc3d369b-fda3-4399-b07b-284bc2cde2e3", 138 | "metadata": { 139 | "tags": [] 140 | }, 141 | "outputs": [], 142 | "source": [ 143 | "def setup_db():\n", 144 | " print(\"Setting up DB\")\n", 145 | " df.to_sql(name=\"patients\", con=con, if_exists=\"replace\", index=True)\n", 146 | " con.commit()" 147 | ] 148 | }, 149 | { 150 | "cell_type": "code", 151 | "execution_count": null, 152 | "id": "3e801b91-36b5-439d-bc73-3a4a7eaa2c70", 153 | "metadata": { 154 | "tags": [] 155 | }, 156 | "outputs": [], 157 | "source": [ 158 | "def maybe_setup_db():\n", 159 | " if force_setup_db:\n", 160 | " print(\"Forcing DB setup\")\n", 161 | " setup_db()\n", 162 | " else:\n", 163 | " try:\n", 164 | " cur = con.cursor()\n", 165 | " cur.execute(\"SELECT count(*) FROM patient\")\n", 166 | " print(f\"Table exists ({cur.fetchone()[0]}), no need to recreate DB\")\n", 167 | " except Exception as ex:\n", 168 | " # print(f\"Caught: {ex}\")\n", 169 | " cur.close()\n", 170 | " if \"no such table: patient\" in str(ex):\n", 171 | " print(f\"Table not there, need to recreate DB\")\n", 172 | " setup_db()\n", 173 | " else:\n", 174 | " raise ex" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "id": "8638e1a5-cce4-4f75-9947-965787a45c6d", 181 | "metadata": { 182 | "tags": [] 183 | }, 184 | "outputs": [], 185 | "source": [ 186 | "maybe_setup_db()" 187 | ] 188 | }, 189 | { 190 | "cell_type": "markdown", 191 | "id": "bdfc11a5-26cf-43da-9a9b-bb858069a845", 192 | "metadata": {}, 193 | "source": [ 194 | "### In order to make the chatbot conversational, we need to de-contextualize questions\n", 195 | "\n", 196 | "For example, if the first question is \"How many patients are over 30?\" and the second question is \"And how many of those have a BMI > 30?\" then we need to rewrite the second question to replace \"those\" with an appropriate referent. For example, we could rewrite the question as \"How many patients that are over 30 also have a BMI > 30?\"" 197 | ] 198 | }, 199 | { 200 | "cell_type": "code", 201 | "execution_count": null, 202 | "id": "50e7c8ff-2e06-49b0-85b0-3d9644b6aa92", 203 | "metadata": { 204 | "tags": [] 205 | }, 206 | "outputs": [], 207 | "source": [ 208 | "def decontextualize_question(question: str, messages: List[List[str]]) -> str:\n", 209 | " \"\"\"\n", 210 | " Each message is a list of [question, answer].\n", 211 | " \"\"\"\n", 212 | " print(f\"decontextualize_question {question} {messages}\")\n", 213 | " prompt_template = \"\"\"\n", 214 | "I am going to give you a history of questions and answers, followed by a new question.\n", 215 | "I want you to rewrite to the new question so that it stands alone, not needing the\n", 216 | "historical context to make sense.\n", 217 | "\n", 218 | "\n", 219 | "{% for x in history %}\n", 220 | " {{ x[0] }}\n", 221 | " {{ x[1] }}\n", 222 | "{% endfor %}\n", 223 | "\n", 224 | "\n", 225 | "Here is the new question:\n", 226 | "\n", 227 | "{{question}}\n", 228 | "\n", 229 | "\n", 230 | "You must make the absolute MINIMUM changes required to make the meaning of\n", 231 | "the sentence clear without the context of the history. Make NO other changes.\n", 232 | "\n", 233 | "Return the rewritten, standalone, question in tags.\n", 234 | "\"\"\"\n", 235 | " prompt = jenv.from_string(prompt_template).render(history=messages, question=question)\n", 236 | " # print(f\"prompt:\\n{prompt}\\n-----\")\n", 237 | " response = llm.invoke(prompt)\n", 238 | " # print(f\"response:\\n{response}\\n--------\")\n", 239 | " answer = u.extract_tag(response.content, \"result\")[0]\n", 240 | " # print(f\"answer: <<{answer}>>\")\n", 241 | " return answer" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "id": "851c491e-1002-4eb3-8994-e96c186a82de", 247 | "metadata": {}, 248 | "source": [ 249 | "Extract the `CREATE TABLE` statement from the database and store it away so we can later insert it into the prompt." 250 | ] 251 | }, 252 | { 253 | "cell_type": "code", 254 | "execution_count": null, 255 | "id": "1802de28-418f-450b-9788-23dd89ab992a", 256 | "metadata": { 257 | "tags": [] 258 | }, 259 | "outputs": [], 260 | "source": [ 261 | "cur = con.cursor()\n", 262 | "cur.execute(\"SELECT * FROM sqlite_master\")\n", 263 | "DDL = cur.fetchone()[4]\n", 264 | "print(DDL)" 265 | ] 266 | }, 267 | { 268 | "cell_type": "markdown", 269 | "id": "5c251f06-4540-4f09-b453-8215733cfcc3", 270 | "metadata": {}, 271 | "source": [ 272 | "We use an instance of `BaseCallbackHandler` to introspect on the sequence of LLM calls (tool invocations) so\n", 273 | "we can later report on useful information about this tool chain like the SQL generated and the number of tool invocations." 274 | ] 275 | }, 276 | { 277 | "cell_type": "code", 278 | "execution_count": null, 279 | "id": "f5d7354d-917d-4673-9c55-61c21572fe9c", 280 | "metadata": { 281 | "tags": [] 282 | }, 283 | "outputs": [], 284 | "source": [ 285 | "class SQLHandler(BaseCallbackHandler):\n", 286 | " def __init__(self):\n", 287 | " self._sql_result = []\n", 288 | " self._num_tool_actions = 0\n", 289 | "\n", 290 | " def on_agent_action(self, action, **kwargs):\n", 291 | " \"\"\"Runs on agent action. if the tool being used is sql_db_query,\n", 292 | " it means we're submitting the sql and we can \n", 293 | " record it as the final sql\n", 294 | " \"\"\"\n", 295 | " self._num_tool_actions += 1\n", 296 | " if action.tool in [\"sql_db_query_checker\", \"sql_db_query\"]:\n", 297 | " self._sql_result.append(action.tool_input)\n", 298 | "\n", 299 | " def sql_results(self) -> List[str]:\n", 300 | " return self._sql_result\n", 301 | "\n", 302 | " def num_tool_actions(self) -> int:\n", 303 | " return self._num_tool_actions" 304 | ] 305 | }, 306 | { 307 | "cell_type": "markdown", 308 | "id": "53376a82-4bfb-40aa-a4b8-b7c3c08c0795", 309 | "metadata": {}, 310 | "source": [ 311 | "We can optionally provide notes or hints about the schema to help guide to model towards generating more accurate\n", 312 | "SQL. In this case the schema is straightforward so we haven't need to add any notes, but you can experiment with adding \n", 313 | "some in here" 314 | ] 315 | }, 316 | { 317 | "cell_type": "code", 318 | "execution_count": null, 319 | "id": "c5ce4db1-3c19-4d78-871a-dcac27e302c5", 320 | "metadata": { 321 | "tags": [] 322 | }, 323 | "outputs": [], 324 | "source": [ 325 | "notes: List[str] = []" 326 | ] 327 | }, 328 | { 329 | "cell_type": "markdown", 330 | "id": "2751163c-54e6-436c-8012-0dd3bc252f57", 331 | "metadata": {}, 332 | "source": [ 333 | "The following is the main prompt that we use to direct the [ReAct](https://arxiv.org/pdf/2210.03629) workflow. Typically this agentic workflow would use the tools sql_db_schema and sql_db_list_tables to extract metadata (the schema) from the database. This requires extra LLM inferences that increases the latency of the overall agentic workflow. Here we both explicitly provide the table name and `CREATE TABLE` statement and also tell the LLM to not call these tools." 334 | ] 335 | }, 336 | { 337 | "cell_type": "code", 338 | "execution_count": null, 339 | "id": "b24b8f44-9f92-447a-bb4d-27e3b337f1fe", 340 | "metadata": { 341 | "tags": [] 342 | }, 343 | "outputs": [], 344 | "source": [ 345 | "prompt_template = '''\n", 346 | "Answer the following questions as best you can.\n", 347 | "\n", 348 | "You have access to the following tools:\n", 349 | "\n", 350 | "{tools}\n", 351 | "\n", 352 | "Use the following format:\n", 353 | "\n", 354 | "Question: the input question you must answer\n", 355 | "Thought: you should always think about what to do\n", 356 | "Action: the action to take, should be one of [{tool_names}]\n", 357 | "Action Input: the input to the action\n", 358 | "Observation: the result of the action\n", 359 | "... (this Thought/Action/Action Input/Observation can repeat N times)\n", 360 | "Thought: I now know the final answer\n", 361 | "Final Answer: the final answer to the original input question\n", 362 | "\n", 363 | "You might find the following tips useful:\n", 364 | "{% for tip in tips %}\n", 365 | " - {{ tip }}\n", 366 | "{% endfor %}\n", 367 | "\n", 368 | "The database has the following single table:\n", 369 | "\n", 370 | "{{ table_info }}\n", 371 | "\n", 372 | "You should NEVER have to use either the sql_db_schema tool or the sql_db_list_tables tool\n", 373 | "as you know the only table is the \"patients\" table and you know its schema.\n", 374 | "\n", 375 | "You NEVER can product SELECT statement with no LIMIT clause. You should always have an ORDER BY\n", 376 | "clause and a \"LIMIT 20\" to avoid returning too many useless results.\n", 377 | "\n", 378 | "When describing the final result you don't have to describe HOW the SQL statement worked,\n", 379 | "just describe the results.\n", 380 | "\n", 381 | "Begin!\n", 382 | "\n", 383 | "Question: {input}\n", 384 | "Thought: {agent_scratchpad}'''" 385 | ] 386 | }, 387 | { 388 | "cell_type": "code", 389 | "execution_count": null, 390 | "id": "0d84a534-7f6b-4fd4-9237-02144728da1f", 391 | "metadata": { 392 | "tags": [] 393 | }, 394 | "outputs": [], 395 | "source": [ 396 | "def create_prompt(notes, DDL, question: str):\n", 397 | " prompt_0 = jenv.from_string(prompt_template).render(tips=notes,\n", 398 | " table_info=DDL)\n", 399 | " prompt = PromptTemplate.from_template(prompt_0)\n", 400 | " return prompt" 401 | ] 402 | }, 403 | { 404 | "cell_type": "markdown", 405 | "id": "8ff6edfa-5020-4f9d-a827-08cf89a1a22f", 406 | "metadata": {}, 407 | "source": [ 408 | "## Answering questions\n", 409 | "\n", 410 | "Below we provide two functions, `answer_standalone_question` and `answer_multiple_questions`, that you can use to drive a chatbot. While the interaction here is admitedly crude, you can easily take these functions and plug them into a framework such as [gradio's ChatBot](https://www.gradio.app/docs/gradio/chatbot) to create a more sophisticated UX." 411 | ] 412 | }, 413 | { 414 | "cell_type": "code", 415 | "execution_count": null, 416 | "id": "1b00b89e-247f-4920-904b-1ef2a912e475", 417 | "metadata": { 418 | "tags": [] 419 | }, 420 | "outputs": [], 421 | "source": [ 422 | "def answer_standalone_question(question: str,\n", 423 | " messages: List[List[str]]) -> str:\n", 424 | " start_time: float = time()\n", 425 | " if is_conversational and messages:\n", 426 | " question = decontextualize_question(question, messages)\n", 427 | " handler = SQLHandler()\n", 428 | " try:\n", 429 | " agent_executor = create_sql_agent(\n", 430 | " llm=llm,\n", 431 | " toolkit=SQLDatabaseToolkit(db=db, llm=llm),\n", 432 | " verbose=True,\n", 433 | " prompt=create_prompt(notes, DDL, question),\n", 434 | " agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,\n", 435 | " callbacks=[handler],\n", 436 | " handle_parsing_errors=True)\n", 437 | " for iteration in itertools.count(0):\n", 438 | " try:\n", 439 | " answer = agent_executor.invoke(input={\"input\": question},\n", 440 | " config={\"callbacks\": [handler]})\n", 441 | " duration = time() - start_time\n", 442 | " iter_str = f\", {iteration} iterations\" if iteration > 1 else \"\"\n", 443 | " history_str = f\", history {len(messages):,}\" if len(messages) > 0 else \"\"\n", 444 | " sql_result = handler.sql_results()[-1].strip() if len(handler.sql_results()) > 0\\\n", 445 | " else None\n", 446 | " print(f\"sql_result: {sql_result}\")\n", 447 | " SQL_str = f\"\\n ```{sql_result}```\" if show_SQL and sql_result else \"\"\n", 448 | " return answer['output'],\\\n", 449 | " f\"{duration:.1f} secs, {handler.num_tool_actions():,} actions{iter_str}{history_str} {SQL_str}\"\n", 450 | " except ValueError as ex:\n", 451 | " if iteration < 10:\n", 452 | " print(f\"iteration #{iteration}: caught {ex}\")\n", 453 | " print(\"retrying\")\n", 454 | " else:\n", 455 | " raise ex\n", 456 | " except Exception as ex:\n", 457 | " print(f\"Caught: {ex}\")\n", 458 | " raise ex" 459 | ] 460 | }, 461 | { 462 | "cell_type": "code", 463 | "execution_count": null, 464 | "id": "71014cc5-480f-440b-9045-ab562aeaec15", 465 | "metadata": { 466 | "tags": [] 467 | }, 468 | "outputs": [], 469 | "source": [ 470 | "def answer_multiple_questions(questions: List[str]) -> List[Tuple[str, str]]:\n", 471 | " messages: List[Tuple[str, str]] = []\n", 472 | " answers: List[str] = []\n", 473 | " for question in questions:\n", 474 | " answer, extra_info = answer_standalone_question(question, messages)\n", 475 | " answers.append(answer)\n", 476 | " messages.append([question, answer])\n", 477 | " return list(zip(questions, answers))" 478 | ] 479 | }, 480 | { 481 | "cell_type": "markdown", 482 | "id": "022ff83b", 483 | "metadata": {}, 484 | "source": [ 485 | "If when executing the next cell you see this error:\n", 486 | "\n", 487 | "![model access error](content/model-access-error.png)\n", 488 | "\n", 489 | "then you need to go to the Bedrock web console and request model access." 490 | ] 491 | }, 492 | { 493 | "cell_type": "code", 494 | "execution_count": null, 495 | "id": "25bce403-9166-4104-affa-82fca1ea3202", 496 | "metadata": { 497 | "tags": [] 498 | }, 499 | "outputs": [], 500 | "source": [ 501 | "answer_standalone_question(\"How many patients have a BMI over 20 and are older than 30?\",\n", 502 | " [])" 503 | ] 504 | }, 505 | { 506 | "cell_type": "code", 507 | "execution_count": null, 508 | "id": "5a7ad365-853e-4636-9584-4e6592f7eee7", 509 | "metadata": {}, 510 | "outputs": [], 511 | "source": [ 512 | "answer_multiple_questions(\n", 513 | " [\"How many patients have a BMI over 20 and are older than 30?\",\n", 514 | " \"How many are over 50?\"])" 515 | ] 516 | }, 517 | { 518 | "cell_type": "code", 519 | "execution_count": null, 520 | "id": "c6a93743-d92d-4992-b647-6a1b3e640532", 521 | "metadata": {}, 522 | "outputs": [], 523 | "source": [] 524 | } 525 | ], 526 | "metadata": { 527 | "kernelspec": { 528 | "display_name": "Python 3 (ipykernel)", 529 | "language": "python", 530 | "name": "python3" 531 | }, 532 | "language_info": { 533 | "codemirror_mode": { 534 | "name": "ipython", 535 | "version": 3 536 | }, 537 | "file_extension": ".py", 538 | "mimetype": "text/x-python", 539 | "name": "python", 540 | "nbconvert_exporter": "python", 541 | "pygments_lexer": "ipython3", 542 | "version": "3.10.14" 543 | } 544 | }, 545 | "nbformat": 4, 546 | "nbformat_minor": 5 547 | } 548 | -------------------------------------------------------------------------------- /module_1/content/model-access-error.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-samples/text-to-sql-bedrock-workshop/8c6844c5a268092c73516a016353c07fe1f146b0/module_1/content/model-access-error.png -------------------------------------------------------------------------------- /module_1/diabetes.csv: -------------------------------------------------------------------------------- 1 | Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome 2 | 6,148,72,35,0,33.6,0.627,50,1 3 | 1,85,66,29,0,26.6,0.351,31,0 4 | 8,183,64,0,0,23.3,0.672,32,1 5 | 1,89,66,23,94,28.1,0.167,21,0 6 | 0,137,40,35,168,43.1,2.288,33,1 7 | 5,116,74,0,0,25.6,0.201,30,0 8 | 3,78,50,32,88,31,0.248,26,1 9 | 10,115,0,0,0,35.3,0.134,29,0 10 | 2,197,70,45,543,30.5,0.158,53,1 11 | 8,125,96,0,0,0,0.232,54,1 12 | 4,110,92,0,0,37.6,0.191,30,0 13 | 10,168,74,0,0,38,0.537,34,1 14 | 10,139,80,0,0,27.1,1.441,57,0 15 | 1,189,60,23,846,30.1,0.398,59,1 16 | 5,166,72,19,175,25.8,0.587,51,1 17 | 7,100,0,0,0,30,0.484,32,1 18 | 0,118,84,47,230,45.8,0.551,31,1 19 | 7,107,74,0,0,29.6,0.254,31,1 20 | 1,103,30,38,83,43.3,0.183,33,0 21 | 1,115,70,30,96,34.6,0.529,32,1 22 | 3,126,88,41,235,39.3,0.704,27,0 23 | 8,99,84,0,0,35.4,0.388,50,0 24 | 7,196,90,0,0,39.8,0.451,41,1 25 | 9,119,80,35,0,29,0.263,29,1 26 | 11,143,94,33,146,36.6,0.254,51,1 27 | 10,125,70,26,115,31.1,0.205,41,1 28 | 7,147,76,0,0,39.4,0.257,43,1 29 | 1,97,66,15,140,23.2,0.487,22,0 30 | 13,145,82,19,110,22.2,0.245,57,0 31 | 5,117,92,0,0,34.1,0.337,38,0 32 | 5,109,75,26,0,36,0.546,60,0 33 | 3,158,76,36,245,31.6,0.851,28,1 34 | 3,88,58,11,54,24.8,0.267,22,0 35 | 6,92,92,0,0,19.9,0.188,28,0 36 | 10,122,78,31,0,27.6,0.512,45,0 37 | 4,103,60,33,192,24,0.966,33,0 38 | 11,138,76,0,0,33.2,0.42,35,0 39 | 9,102,76,37,0,32.9,0.665,46,1 40 | 2,90,68,42,0,38.2,0.503,27,1 41 | 4,111,72,47,207,37.1,1.39,56,1 42 | 3,180,64,25,70,34,0.271,26,0 43 | 7,133,84,0,0,40.2,0.696,37,0 44 | 7,106,92,18,0,22.7,0.235,48,0 45 | 9,171,110,24,240,45.4,0.721,54,1 46 | 7,159,64,0,0,27.4,0.294,40,0 47 | 0,180,66,39,0,42,1.893,25,1 48 | 1,146,56,0,0,29.7,0.564,29,0 49 | 2,71,70,27,0,28,0.586,22,0 50 | 7,103,66,32,0,39.1,0.344,31,1 51 | 7,105,0,0,0,0,0.305,24,0 52 | 1,103,80,11,82,19.4,0.491,22,0 53 | 1,101,50,15,36,24.2,0.526,26,0 54 | 5,88,66,21,23,24.4,0.342,30,0 55 | 8,176,90,34,300,33.7,0.467,58,1 56 | 7,150,66,42,342,34.7,0.718,42,0 57 | 1,73,50,10,0,23,0.248,21,0 58 | 7,187,68,39,304,37.7,0.254,41,1 59 | 0,100,88,60,110,46.8,0.962,31,0 60 | 0,146,82,0,0,40.5,1.781,44,0 61 | 0,105,64,41,142,41.5,0.173,22,0 62 | 2,84,0,0,0,0,0.304,21,0 63 | 8,133,72,0,0,32.9,0.27,39,1 64 | 5,44,62,0,0,25,0.587,36,0 65 | 2,141,58,34,128,25.4,0.699,24,0 66 | 7,114,66,0,0,32.8,0.258,42,1 67 | 5,99,74,27,0,29,0.203,32,0 68 | 0,109,88,30,0,32.5,0.855,38,1 69 | 2,109,92,0,0,42.7,0.845,54,0 70 | 1,95,66,13,38,19.6,0.334,25,0 71 | 4,146,85,27,100,28.9,0.189,27,0 72 | 2,100,66,20,90,32.9,0.867,28,1 73 | 5,139,64,35,140,28.6,0.411,26,0 74 | 13,126,90,0,0,43.4,0.583,42,1 75 | 4,129,86,20,270,35.1,0.231,23,0 76 | 1,79,75,30,0,32,0.396,22,0 77 | 1,0,48,20,0,24.7,0.14,22,0 78 | 7,62,78,0,0,32.6,0.391,41,0 79 | 5,95,72,33,0,37.7,0.37,27,0 80 | 0,131,0,0,0,43.2,0.27,26,1 81 | 2,112,66,22,0,25,0.307,24,0 82 | 3,113,44,13,0,22.4,0.14,22,0 83 | 2,74,0,0,0,0,0.102,22,0 84 | 7,83,78,26,71,29.3,0.767,36,0 85 | 0,101,65,28,0,24.6,0.237,22,0 86 | 5,137,108,0,0,48.8,0.227,37,1 87 | 2,110,74,29,125,32.4,0.698,27,0 88 | 13,106,72,54,0,36.6,0.178,45,0 89 | 2,100,68,25,71,38.5,0.324,26,0 90 | 15,136,70,32,110,37.1,0.153,43,1 91 | 1,107,68,19,0,26.5,0.165,24,0 92 | 1,80,55,0,0,19.1,0.258,21,0 93 | 4,123,80,15,176,32,0.443,34,0 94 | 7,81,78,40,48,46.7,0.261,42,0 95 | 4,134,72,0,0,23.8,0.277,60,1 96 | 2,142,82,18,64,24.7,0.761,21,0 97 | 6,144,72,27,228,33.9,0.255,40,0 98 | 2,92,62,28,0,31.6,0.13,24,0 99 | 1,71,48,18,76,20.4,0.323,22,0 100 | 6,93,50,30,64,28.7,0.356,23,0 101 | 1,122,90,51,220,49.7,0.325,31,1 102 | 1,163,72,0,0,39,1.222,33,1 103 | 1,151,60,0,0,26.1,0.179,22,0 104 | 0,125,96,0,0,22.5,0.262,21,0 105 | 1,81,72,18,40,26.6,0.283,24,0 106 | 2,85,65,0,0,39.6,0.93,27,0 107 | 1,126,56,29,152,28.7,0.801,21,0 108 | 1,96,122,0,0,22.4,0.207,27,0 109 | 4,144,58,28,140,29.5,0.287,37,0 110 | 3,83,58,31,18,34.3,0.336,25,0 111 | 0,95,85,25,36,37.4,0.247,24,1 112 | 3,171,72,33,135,33.3,0.199,24,1 113 | 8,155,62,26,495,34,0.543,46,1 114 | 1,89,76,34,37,31.2,0.192,23,0 115 | 4,76,62,0,0,34,0.391,25,0 116 | 7,160,54,32,175,30.5,0.588,39,1 117 | 4,146,92,0,0,31.2,0.539,61,1 118 | 5,124,74,0,0,34,0.22,38,1 119 | 5,78,48,0,0,33.7,0.654,25,0 120 | 4,97,60,23,0,28.2,0.443,22,0 121 | 4,99,76,15,51,23.2,0.223,21,0 122 | 0,162,76,56,100,53.2,0.759,25,1 123 | 6,111,64,39,0,34.2,0.26,24,0 124 | 2,107,74,30,100,33.6,0.404,23,0 125 | 5,132,80,0,0,26.8,0.186,69,0 126 | 0,113,76,0,0,33.3,0.278,23,1 127 | 1,88,30,42,99,55,0.496,26,1 128 | 3,120,70,30,135,42.9,0.452,30,0 129 | 1,118,58,36,94,33.3,0.261,23,0 130 | 1,117,88,24,145,34.5,0.403,40,1 131 | 0,105,84,0,0,27.9,0.741,62,1 132 | 4,173,70,14,168,29.7,0.361,33,1 133 | 9,122,56,0,0,33.3,1.114,33,1 134 | 3,170,64,37,225,34.5,0.356,30,1 135 | 8,84,74,31,0,38.3,0.457,39,0 136 | 2,96,68,13,49,21.1,0.647,26,0 137 | 2,125,60,20,140,33.8,0.088,31,0 138 | 0,100,70,26,50,30.8,0.597,21,0 139 | 0,93,60,25,92,28.7,0.532,22,0 140 | 0,129,80,0,0,31.2,0.703,29,0 141 | 5,105,72,29,325,36.9,0.159,28,0 142 | 3,128,78,0,0,21.1,0.268,55,0 143 | 5,106,82,30,0,39.5,0.286,38,0 144 | 2,108,52,26,63,32.5,0.318,22,0 145 | 10,108,66,0,0,32.4,0.272,42,1 146 | 4,154,62,31,284,32.8,0.237,23,0 147 | 0,102,75,23,0,0,0.572,21,0 148 | 9,57,80,37,0,32.8,0.096,41,0 149 | 2,106,64,35,119,30.5,1.4,34,0 150 | 5,147,78,0,0,33.7,0.218,65,0 151 | 2,90,70,17,0,27.3,0.085,22,0 152 | 1,136,74,50,204,37.4,0.399,24,0 153 | 4,114,65,0,0,21.9,0.432,37,0 154 | 9,156,86,28,155,34.3,1.189,42,1 155 | 1,153,82,42,485,40.6,0.687,23,0 156 | 8,188,78,0,0,47.9,0.137,43,1 157 | 7,152,88,44,0,50,0.337,36,1 158 | 2,99,52,15,94,24.6,0.637,21,0 159 | 1,109,56,21,135,25.2,0.833,23,0 160 | 2,88,74,19,53,29,0.229,22,0 161 | 17,163,72,41,114,40.9,0.817,47,1 162 | 4,151,90,38,0,29.7,0.294,36,0 163 | 7,102,74,40,105,37.2,0.204,45,0 164 | 0,114,80,34,285,44.2,0.167,27,0 165 | 2,100,64,23,0,29.7,0.368,21,0 166 | 0,131,88,0,0,31.6,0.743,32,1 167 | 6,104,74,18,156,29.9,0.722,41,1 168 | 3,148,66,25,0,32.5,0.256,22,0 169 | 4,120,68,0,0,29.6,0.709,34,0 170 | 4,110,66,0,0,31.9,0.471,29,0 171 | 3,111,90,12,78,28.4,0.495,29,0 172 | 6,102,82,0,0,30.8,0.18,36,1 173 | 6,134,70,23,130,35.4,0.542,29,1 174 | 2,87,0,23,0,28.9,0.773,25,0 175 | 1,79,60,42,48,43.5,0.678,23,0 176 | 2,75,64,24,55,29.7,0.37,33,0 177 | 8,179,72,42,130,32.7,0.719,36,1 178 | 6,85,78,0,0,31.2,0.382,42,0 179 | 0,129,110,46,130,67.1,0.319,26,1 180 | 5,143,78,0,0,45,0.19,47,0 181 | 5,130,82,0,0,39.1,0.956,37,1 182 | 6,87,80,0,0,23.2,0.084,32,0 183 | 0,119,64,18,92,34.9,0.725,23,0 184 | 1,0,74,20,23,27.7,0.299,21,0 185 | 5,73,60,0,0,26.8,0.268,27,0 186 | 4,141,74,0,0,27.6,0.244,40,0 187 | 7,194,68,28,0,35.9,0.745,41,1 188 | 8,181,68,36,495,30.1,0.615,60,1 189 | 1,128,98,41,58,32,1.321,33,1 190 | 8,109,76,39,114,27.9,0.64,31,1 191 | 5,139,80,35,160,31.6,0.361,25,1 192 | 3,111,62,0,0,22.6,0.142,21,0 193 | 9,123,70,44,94,33.1,0.374,40,0 194 | 7,159,66,0,0,30.4,0.383,36,1 195 | 11,135,0,0,0,52.3,0.578,40,1 196 | 8,85,55,20,0,24.4,0.136,42,0 197 | 5,158,84,41,210,39.4,0.395,29,1 198 | 1,105,58,0,0,24.3,0.187,21,0 199 | 3,107,62,13,48,22.9,0.678,23,1 200 | 4,109,64,44,99,34.8,0.905,26,1 201 | 4,148,60,27,318,30.9,0.15,29,1 202 | 0,113,80,16,0,31,0.874,21,0 203 | 1,138,82,0,0,40.1,0.236,28,0 204 | 0,108,68,20,0,27.3,0.787,32,0 205 | 2,99,70,16,44,20.4,0.235,27,0 206 | 6,103,72,32,190,37.7,0.324,55,0 207 | 5,111,72,28,0,23.9,0.407,27,0 208 | 8,196,76,29,280,37.5,0.605,57,1 209 | 5,162,104,0,0,37.7,0.151,52,1 210 | 1,96,64,27,87,33.2,0.289,21,0 211 | 7,184,84,33,0,35.5,0.355,41,1 212 | 2,81,60,22,0,27.7,0.29,25,0 213 | 0,147,85,54,0,42.8,0.375,24,0 214 | 7,179,95,31,0,34.2,0.164,60,0 215 | 0,140,65,26,130,42.6,0.431,24,1 216 | 9,112,82,32,175,34.2,0.26,36,1 217 | 12,151,70,40,271,41.8,0.742,38,1 218 | 5,109,62,41,129,35.8,0.514,25,1 219 | 6,125,68,30,120,30,0.464,32,0 220 | 5,85,74,22,0,29,1.224,32,1 221 | 5,112,66,0,0,37.8,0.261,41,1 222 | 0,177,60,29,478,34.6,1.072,21,1 223 | 2,158,90,0,0,31.6,0.805,66,1 224 | 7,119,0,0,0,25.2,0.209,37,0 225 | 7,142,60,33,190,28.8,0.687,61,0 226 | 1,100,66,15,56,23.6,0.666,26,0 227 | 1,87,78,27,32,34.6,0.101,22,0 228 | 0,101,76,0,0,35.7,0.198,26,0 229 | 3,162,52,38,0,37.2,0.652,24,1 230 | 4,197,70,39,744,36.7,2.329,31,0 231 | 0,117,80,31,53,45.2,0.089,24,0 232 | 4,142,86,0,0,44,0.645,22,1 233 | 6,134,80,37,370,46.2,0.238,46,1 234 | 1,79,80,25,37,25.4,0.583,22,0 235 | 4,122,68,0,0,35,0.394,29,0 236 | 3,74,68,28,45,29.7,0.293,23,0 237 | 4,171,72,0,0,43.6,0.479,26,1 238 | 7,181,84,21,192,35.9,0.586,51,1 239 | 0,179,90,27,0,44.1,0.686,23,1 240 | 9,164,84,21,0,30.8,0.831,32,1 241 | 0,104,76,0,0,18.4,0.582,27,0 242 | 1,91,64,24,0,29.2,0.192,21,0 243 | 4,91,70,32,88,33.1,0.446,22,0 244 | 3,139,54,0,0,25.6,0.402,22,1 245 | 6,119,50,22,176,27.1,1.318,33,1 246 | 2,146,76,35,194,38.2,0.329,29,0 247 | 9,184,85,15,0,30,1.213,49,1 248 | 10,122,68,0,0,31.2,0.258,41,0 249 | 0,165,90,33,680,52.3,0.427,23,0 250 | 9,124,70,33,402,35.4,0.282,34,0 251 | 1,111,86,19,0,30.1,0.143,23,0 252 | 9,106,52,0,0,31.2,0.38,42,0 253 | 2,129,84,0,0,28,0.284,27,0 254 | 2,90,80,14,55,24.4,0.249,24,0 255 | 0,86,68,32,0,35.8,0.238,25,0 256 | 12,92,62,7,258,27.6,0.926,44,1 257 | 1,113,64,35,0,33.6,0.543,21,1 258 | 3,111,56,39,0,30.1,0.557,30,0 259 | 2,114,68,22,0,28.7,0.092,25,0 260 | 1,193,50,16,375,25.9,0.655,24,0 261 | 11,155,76,28,150,33.3,1.353,51,1 262 | 3,191,68,15,130,30.9,0.299,34,0 263 | 3,141,0,0,0,30,0.761,27,1 264 | 4,95,70,32,0,32.1,0.612,24,0 265 | 3,142,80,15,0,32.4,0.2,63,0 266 | 4,123,62,0,0,32,0.226,35,1 267 | 5,96,74,18,67,33.6,0.997,43,0 268 | 0,138,0,0,0,36.3,0.933,25,1 269 | 2,128,64,42,0,40,1.101,24,0 270 | 0,102,52,0,0,25.1,0.078,21,0 271 | 2,146,0,0,0,27.5,0.24,28,1 272 | 10,101,86,37,0,45.6,1.136,38,1 273 | 2,108,62,32,56,25.2,0.128,21,0 274 | 3,122,78,0,0,23,0.254,40,0 275 | 1,71,78,50,45,33.2,0.422,21,0 276 | 13,106,70,0,0,34.2,0.251,52,0 277 | 2,100,70,52,57,40.5,0.677,25,0 278 | 7,106,60,24,0,26.5,0.296,29,1 279 | 0,104,64,23,116,27.8,0.454,23,0 280 | 5,114,74,0,0,24.9,0.744,57,0 281 | 2,108,62,10,278,25.3,0.881,22,0 282 | 0,146,70,0,0,37.9,0.334,28,1 283 | 10,129,76,28,122,35.9,0.28,39,0 284 | 7,133,88,15,155,32.4,0.262,37,0 285 | 7,161,86,0,0,30.4,0.165,47,1 286 | 2,108,80,0,0,27,0.259,52,1 287 | 7,136,74,26,135,26,0.647,51,0 288 | 5,155,84,44,545,38.7,0.619,34,0 289 | 1,119,86,39,220,45.6,0.808,29,1 290 | 4,96,56,17,49,20.8,0.34,26,0 291 | 5,108,72,43,75,36.1,0.263,33,0 292 | 0,78,88,29,40,36.9,0.434,21,0 293 | 0,107,62,30,74,36.6,0.757,25,1 294 | 2,128,78,37,182,43.3,1.224,31,1 295 | 1,128,48,45,194,40.5,0.613,24,1 296 | 0,161,50,0,0,21.9,0.254,65,0 297 | 6,151,62,31,120,35.5,0.692,28,0 298 | 2,146,70,38,360,28,0.337,29,1 299 | 0,126,84,29,215,30.7,0.52,24,0 300 | 14,100,78,25,184,36.6,0.412,46,1 301 | 8,112,72,0,0,23.6,0.84,58,0 302 | 0,167,0,0,0,32.3,0.839,30,1 303 | 2,144,58,33,135,31.6,0.422,25,1 304 | 5,77,82,41,42,35.8,0.156,35,0 305 | 5,115,98,0,0,52.9,0.209,28,1 306 | 3,150,76,0,0,21,0.207,37,0 307 | 2,120,76,37,105,39.7,0.215,29,0 308 | 10,161,68,23,132,25.5,0.326,47,1 309 | 0,137,68,14,148,24.8,0.143,21,0 310 | 0,128,68,19,180,30.5,1.391,25,1 311 | 2,124,68,28,205,32.9,0.875,30,1 312 | 6,80,66,30,0,26.2,0.313,41,0 313 | 0,106,70,37,148,39.4,0.605,22,0 314 | 2,155,74,17,96,26.6,0.433,27,1 315 | 3,113,50,10,85,29.5,0.626,25,0 316 | 7,109,80,31,0,35.9,1.127,43,1 317 | 2,112,68,22,94,34.1,0.315,26,0 318 | 3,99,80,11,64,19.3,0.284,30,0 319 | 3,182,74,0,0,30.5,0.345,29,1 320 | 3,115,66,39,140,38.1,0.15,28,0 321 | 6,194,78,0,0,23.5,0.129,59,1 322 | 4,129,60,12,231,27.5,0.527,31,0 323 | 3,112,74,30,0,31.6,0.197,25,1 324 | 0,124,70,20,0,27.4,0.254,36,1 325 | 13,152,90,33,29,26.8,0.731,43,1 326 | 2,112,75,32,0,35.7,0.148,21,0 327 | 1,157,72,21,168,25.6,0.123,24,0 328 | 1,122,64,32,156,35.1,0.692,30,1 329 | 10,179,70,0,0,35.1,0.2,37,0 330 | 2,102,86,36,120,45.5,0.127,23,1 331 | 6,105,70,32,68,30.8,0.122,37,0 332 | 8,118,72,19,0,23.1,1.476,46,0 333 | 2,87,58,16,52,32.7,0.166,25,0 334 | 1,180,0,0,0,43.3,0.282,41,1 335 | 12,106,80,0,0,23.6,0.137,44,0 336 | 1,95,60,18,58,23.9,0.26,22,0 337 | 0,165,76,43,255,47.9,0.259,26,0 338 | 0,117,0,0,0,33.8,0.932,44,0 339 | 5,115,76,0,0,31.2,0.343,44,1 340 | 9,152,78,34,171,34.2,0.893,33,1 341 | 7,178,84,0,0,39.9,0.331,41,1 342 | 1,130,70,13,105,25.9,0.472,22,0 343 | 1,95,74,21,73,25.9,0.673,36,0 344 | 1,0,68,35,0,32,0.389,22,0 345 | 5,122,86,0,0,34.7,0.29,33,0 346 | 8,95,72,0,0,36.8,0.485,57,0 347 | 8,126,88,36,108,38.5,0.349,49,0 348 | 1,139,46,19,83,28.7,0.654,22,0 349 | 3,116,0,0,0,23.5,0.187,23,0 350 | 3,99,62,19,74,21.8,0.279,26,0 351 | 5,0,80,32,0,41,0.346,37,1 352 | 4,92,80,0,0,42.2,0.237,29,0 353 | 4,137,84,0,0,31.2,0.252,30,0 354 | 3,61,82,28,0,34.4,0.243,46,0 355 | 1,90,62,12,43,27.2,0.58,24,0 356 | 3,90,78,0,0,42.7,0.559,21,0 357 | 9,165,88,0,0,30.4,0.302,49,1 358 | 1,125,50,40,167,33.3,0.962,28,1 359 | 13,129,0,30,0,39.9,0.569,44,1 360 | 12,88,74,40,54,35.3,0.378,48,0 361 | 1,196,76,36,249,36.5,0.875,29,1 362 | 5,189,64,33,325,31.2,0.583,29,1 363 | 5,158,70,0,0,29.8,0.207,63,0 364 | 5,103,108,37,0,39.2,0.305,65,0 365 | 4,146,78,0,0,38.5,0.52,67,1 366 | 4,147,74,25,293,34.9,0.385,30,0 367 | 5,99,54,28,83,34,0.499,30,0 368 | 6,124,72,0,0,27.6,0.368,29,1 369 | 0,101,64,17,0,21,0.252,21,0 370 | 3,81,86,16,66,27.5,0.306,22,0 371 | 1,133,102,28,140,32.8,0.234,45,1 372 | 3,173,82,48,465,38.4,2.137,25,1 373 | 0,118,64,23,89,0,1.731,21,0 374 | 0,84,64,22,66,35.8,0.545,21,0 375 | 2,105,58,40,94,34.9,0.225,25,0 376 | 2,122,52,43,158,36.2,0.816,28,0 377 | 12,140,82,43,325,39.2,0.528,58,1 378 | 0,98,82,15,84,25.2,0.299,22,0 379 | 1,87,60,37,75,37.2,0.509,22,0 380 | 4,156,75,0,0,48.3,0.238,32,1 381 | 0,93,100,39,72,43.4,1.021,35,0 382 | 1,107,72,30,82,30.8,0.821,24,0 383 | 0,105,68,22,0,20,0.236,22,0 384 | 1,109,60,8,182,25.4,0.947,21,0 385 | 1,90,62,18,59,25.1,1.268,25,0 386 | 1,125,70,24,110,24.3,0.221,25,0 387 | 1,119,54,13,50,22.3,0.205,24,0 388 | 5,116,74,29,0,32.3,0.66,35,1 389 | 8,105,100,36,0,43.3,0.239,45,1 390 | 5,144,82,26,285,32,0.452,58,1 391 | 3,100,68,23,81,31.6,0.949,28,0 392 | 1,100,66,29,196,32,0.444,42,0 393 | 5,166,76,0,0,45.7,0.34,27,1 394 | 1,131,64,14,415,23.7,0.389,21,0 395 | 4,116,72,12,87,22.1,0.463,37,0 396 | 4,158,78,0,0,32.9,0.803,31,1 397 | 2,127,58,24,275,27.7,1.6,25,0 398 | 3,96,56,34,115,24.7,0.944,39,0 399 | 0,131,66,40,0,34.3,0.196,22,1 400 | 3,82,70,0,0,21.1,0.389,25,0 401 | 3,193,70,31,0,34.9,0.241,25,1 402 | 4,95,64,0,0,32,0.161,31,1 403 | 6,137,61,0,0,24.2,0.151,55,0 404 | 5,136,84,41,88,35,0.286,35,1 405 | 9,72,78,25,0,31.6,0.28,38,0 406 | 5,168,64,0,0,32.9,0.135,41,1 407 | 2,123,48,32,165,42.1,0.52,26,0 408 | 4,115,72,0,0,28.9,0.376,46,1 409 | 0,101,62,0,0,21.9,0.336,25,0 410 | 8,197,74,0,0,25.9,1.191,39,1 411 | 1,172,68,49,579,42.4,0.702,28,1 412 | 6,102,90,39,0,35.7,0.674,28,0 413 | 1,112,72,30,176,34.4,0.528,25,0 414 | 1,143,84,23,310,42.4,1.076,22,0 415 | 1,143,74,22,61,26.2,0.256,21,0 416 | 0,138,60,35,167,34.6,0.534,21,1 417 | 3,173,84,33,474,35.7,0.258,22,1 418 | 1,97,68,21,0,27.2,1.095,22,0 419 | 4,144,82,32,0,38.5,0.554,37,1 420 | 1,83,68,0,0,18.2,0.624,27,0 421 | 3,129,64,29,115,26.4,0.219,28,1 422 | 1,119,88,41,170,45.3,0.507,26,0 423 | 2,94,68,18,76,26,0.561,21,0 424 | 0,102,64,46,78,40.6,0.496,21,0 425 | 2,115,64,22,0,30.8,0.421,21,0 426 | 8,151,78,32,210,42.9,0.516,36,1 427 | 4,184,78,39,277,37,0.264,31,1 428 | 0,94,0,0,0,0,0.256,25,0 429 | 1,181,64,30,180,34.1,0.328,38,1 430 | 0,135,94,46,145,40.6,0.284,26,0 431 | 1,95,82,25,180,35,0.233,43,1 432 | 2,99,0,0,0,22.2,0.108,23,0 433 | 3,89,74,16,85,30.4,0.551,38,0 434 | 1,80,74,11,60,30,0.527,22,0 435 | 2,139,75,0,0,25.6,0.167,29,0 436 | 1,90,68,8,0,24.5,1.138,36,0 437 | 0,141,0,0,0,42.4,0.205,29,1 438 | 12,140,85,33,0,37.4,0.244,41,0 439 | 5,147,75,0,0,29.9,0.434,28,0 440 | 1,97,70,15,0,18.2,0.147,21,0 441 | 6,107,88,0,0,36.8,0.727,31,0 442 | 0,189,104,25,0,34.3,0.435,41,1 443 | 2,83,66,23,50,32.2,0.497,22,0 444 | 4,117,64,27,120,33.2,0.23,24,0 445 | 8,108,70,0,0,30.5,0.955,33,1 446 | 4,117,62,12,0,29.7,0.38,30,1 447 | 0,180,78,63,14,59.4,2.42,25,1 448 | 1,100,72,12,70,25.3,0.658,28,0 449 | 0,95,80,45,92,36.5,0.33,26,0 450 | 0,104,64,37,64,33.6,0.51,22,1 451 | 0,120,74,18,63,30.5,0.285,26,0 452 | 1,82,64,13,95,21.2,0.415,23,0 453 | 2,134,70,0,0,28.9,0.542,23,1 454 | 0,91,68,32,210,39.9,0.381,25,0 455 | 2,119,0,0,0,19.6,0.832,72,0 456 | 2,100,54,28,105,37.8,0.498,24,0 457 | 14,175,62,30,0,33.6,0.212,38,1 458 | 1,135,54,0,0,26.7,0.687,62,0 459 | 5,86,68,28,71,30.2,0.364,24,0 460 | 10,148,84,48,237,37.6,1.001,51,1 461 | 9,134,74,33,60,25.9,0.46,81,0 462 | 9,120,72,22,56,20.8,0.733,48,0 463 | 1,71,62,0,0,21.8,0.416,26,0 464 | 8,74,70,40,49,35.3,0.705,39,0 465 | 5,88,78,30,0,27.6,0.258,37,0 466 | 10,115,98,0,0,24,1.022,34,0 467 | 0,124,56,13,105,21.8,0.452,21,0 468 | 0,74,52,10,36,27.8,0.269,22,0 469 | 0,97,64,36,100,36.8,0.6,25,0 470 | 8,120,0,0,0,30,0.183,38,1 471 | 6,154,78,41,140,46.1,0.571,27,0 472 | 1,144,82,40,0,41.3,0.607,28,0 473 | 0,137,70,38,0,33.2,0.17,22,0 474 | 0,119,66,27,0,38.8,0.259,22,0 475 | 7,136,90,0,0,29.9,0.21,50,0 476 | 4,114,64,0,0,28.9,0.126,24,0 477 | 0,137,84,27,0,27.3,0.231,59,0 478 | 2,105,80,45,191,33.7,0.711,29,1 479 | 7,114,76,17,110,23.8,0.466,31,0 480 | 8,126,74,38,75,25.9,0.162,39,0 481 | 4,132,86,31,0,28,0.419,63,0 482 | 3,158,70,30,328,35.5,0.344,35,1 483 | 0,123,88,37,0,35.2,0.197,29,0 484 | 4,85,58,22,49,27.8,0.306,28,0 485 | 0,84,82,31,125,38.2,0.233,23,0 486 | 0,145,0,0,0,44.2,0.63,31,1 487 | 0,135,68,42,250,42.3,0.365,24,1 488 | 1,139,62,41,480,40.7,0.536,21,0 489 | 0,173,78,32,265,46.5,1.159,58,0 490 | 4,99,72,17,0,25.6,0.294,28,0 491 | 8,194,80,0,0,26.1,0.551,67,0 492 | 2,83,65,28,66,36.8,0.629,24,0 493 | 2,89,90,30,0,33.5,0.292,42,0 494 | 4,99,68,38,0,32.8,0.145,33,0 495 | 4,125,70,18,122,28.9,1.144,45,1 496 | 3,80,0,0,0,0,0.174,22,0 497 | 6,166,74,0,0,26.6,0.304,66,0 498 | 5,110,68,0,0,26,0.292,30,0 499 | 2,81,72,15,76,30.1,0.547,25,0 500 | 7,195,70,33,145,25.1,0.163,55,1 501 | 6,154,74,32,193,29.3,0.839,39,0 502 | 2,117,90,19,71,25.2,0.313,21,0 503 | 3,84,72,32,0,37.2,0.267,28,0 504 | 6,0,68,41,0,39,0.727,41,1 505 | 7,94,64,25,79,33.3,0.738,41,0 506 | 3,96,78,39,0,37.3,0.238,40,0 507 | 10,75,82,0,0,33.3,0.263,38,0 508 | 0,180,90,26,90,36.5,0.314,35,1 509 | 1,130,60,23,170,28.6,0.692,21,0 510 | 2,84,50,23,76,30.4,0.968,21,0 511 | 8,120,78,0,0,25,0.409,64,0 512 | 12,84,72,31,0,29.7,0.297,46,1 513 | 0,139,62,17,210,22.1,0.207,21,0 514 | 9,91,68,0,0,24.2,0.2,58,0 515 | 2,91,62,0,0,27.3,0.525,22,0 516 | 3,99,54,19,86,25.6,0.154,24,0 517 | 3,163,70,18,105,31.6,0.268,28,1 518 | 9,145,88,34,165,30.3,0.771,53,1 519 | 7,125,86,0,0,37.6,0.304,51,0 520 | 13,76,60,0,0,32.8,0.18,41,0 521 | 6,129,90,7,326,19.6,0.582,60,0 522 | 2,68,70,32,66,25,0.187,25,0 523 | 3,124,80,33,130,33.2,0.305,26,0 524 | 6,114,0,0,0,0,0.189,26,0 525 | 9,130,70,0,0,34.2,0.652,45,1 526 | 3,125,58,0,0,31.6,0.151,24,0 527 | 3,87,60,18,0,21.8,0.444,21,0 528 | 1,97,64,19,82,18.2,0.299,21,0 529 | 3,116,74,15,105,26.3,0.107,24,0 530 | 0,117,66,31,188,30.8,0.493,22,0 531 | 0,111,65,0,0,24.6,0.66,31,0 532 | 2,122,60,18,106,29.8,0.717,22,0 533 | 0,107,76,0,0,45.3,0.686,24,0 534 | 1,86,66,52,65,41.3,0.917,29,0 535 | 6,91,0,0,0,29.8,0.501,31,0 536 | 1,77,56,30,56,33.3,1.251,24,0 537 | 4,132,0,0,0,32.9,0.302,23,1 538 | 0,105,90,0,0,29.6,0.197,46,0 539 | 0,57,60,0,0,21.7,0.735,67,0 540 | 0,127,80,37,210,36.3,0.804,23,0 541 | 3,129,92,49,155,36.4,0.968,32,1 542 | 8,100,74,40,215,39.4,0.661,43,1 543 | 3,128,72,25,190,32.4,0.549,27,1 544 | 10,90,85,32,0,34.9,0.825,56,1 545 | 4,84,90,23,56,39.5,0.159,25,0 546 | 1,88,78,29,76,32,0.365,29,0 547 | 8,186,90,35,225,34.5,0.423,37,1 548 | 5,187,76,27,207,43.6,1.034,53,1 549 | 4,131,68,21,166,33.1,0.16,28,0 550 | 1,164,82,43,67,32.8,0.341,50,0 551 | 4,189,110,31,0,28.5,0.68,37,0 552 | 1,116,70,28,0,27.4,0.204,21,0 553 | 3,84,68,30,106,31.9,0.591,25,0 554 | 6,114,88,0,0,27.8,0.247,66,0 555 | 1,88,62,24,44,29.9,0.422,23,0 556 | 1,84,64,23,115,36.9,0.471,28,0 557 | 7,124,70,33,215,25.5,0.161,37,0 558 | 1,97,70,40,0,38.1,0.218,30,0 559 | 8,110,76,0,0,27.8,0.237,58,0 560 | 11,103,68,40,0,46.2,0.126,42,0 561 | 11,85,74,0,0,30.1,0.3,35,0 562 | 6,125,76,0,0,33.8,0.121,54,1 563 | 0,198,66,32,274,41.3,0.502,28,1 564 | 1,87,68,34,77,37.6,0.401,24,0 565 | 6,99,60,19,54,26.9,0.497,32,0 566 | 0,91,80,0,0,32.4,0.601,27,0 567 | 2,95,54,14,88,26.1,0.748,22,0 568 | 1,99,72,30,18,38.6,0.412,21,0 569 | 6,92,62,32,126,32,0.085,46,0 570 | 4,154,72,29,126,31.3,0.338,37,0 571 | 0,121,66,30,165,34.3,0.203,33,1 572 | 3,78,70,0,0,32.5,0.27,39,0 573 | 2,130,96,0,0,22.6,0.268,21,0 574 | 3,111,58,31,44,29.5,0.43,22,0 575 | 2,98,60,17,120,34.7,0.198,22,0 576 | 1,143,86,30,330,30.1,0.892,23,0 577 | 1,119,44,47,63,35.5,0.28,25,0 578 | 6,108,44,20,130,24,0.813,35,0 579 | 2,118,80,0,0,42.9,0.693,21,1 580 | 10,133,68,0,0,27,0.245,36,0 581 | 2,197,70,99,0,34.7,0.575,62,1 582 | 0,151,90,46,0,42.1,0.371,21,1 583 | 6,109,60,27,0,25,0.206,27,0 584 | 12,121,78,17,0,26.5,0.259,62,0 585 | 8,100,76,0,0,38.7,0.19,42,0 586 | 8,124,76,24,600,28.7,0.687,52,1 587 | 1,93,56,11,0,22.5,0.417,22,0 588 | 8,143,66,0,0,34.9,0.129,41,1 589 | 6,103,66,0,0,24.3,0.249,29,0 590 | 3,176,86,27,156,33.3,1.154,52,1 591 | 0,73,0,0,0,21.1,0.342,25,0 592 | 11,111,84,40,0,46.8,0.925,45,1 593 | 2,112,78,50,140,39.4,0.175,24,0 594 | 3,132,80,0,0,34.4,0.402,44,1 595 | 2,82,52,22,115,28.5,1.699,25,0 596 | 6,123,72,45,230,33.6,0.733,34,0 597 | 0,188,82,14,185,32,0.682,22,1 598 | 0,67,76,0,0,45.3,0.194,46,0 599 | 1,89,24,19,25,27.8,0.559,21,0 600 | 1,173,74,0,0,36.8,0.088,38,1 601 | 1,109,38,18,120,23.1,0.407,26,0 602 | 1,108,88,19,0,27.1,0.4,24,0 603 | 6,96,0,0,0,23.7,0.19,28,0 604 | 1,124,74,36,0,27.8,0.1,30,0 605 | 7,150,78,29,126,35.2,0.692,54,1 606 | 4,183,0,0,0,28.4,0.212,36,1 607 | 1,124,60,32,0,35.8,0.514,21,0 608 | 1,181,78,42,293,40,1.258,22,1 609 | 1,92,62,25,41,19.5,0.482,25,0 610 | 0,152,82,39,272,41.5,0.27,27,0 611 | 1,111,62,13,182,24,0.138,23,0 612 | 3,106,54,21,158,30.9,0.292,24,0 613 | 3,174,58,22,194,32.9,0.593,36,1 614 | 7,168,88,42,321,38.2,0.787,40,1 615 | 6,105,80,28,0,32.5,0.878,26,0 616 | 11,138,74,26,144,36.1,0.557,50,1 617 | 3,106,72,0,0,25.8,0.207,27,0 618 | 6,117,96,0,0,28.7,0.157,30,0 619 | 2,68,62,13,15,20.1,0.257,23,0 620 | 9,112,82,24,0,28.2,1.282,50,1 621 | 0,119,0,0,0,32.4,0.141,24,1 622 | 2,112,86,42,160,38.4,0.246,28,0 623 | 2,92,76,20,0,24.2,1.698,28,0 624 | 6,183,94,0,0,40.8,1.461,45,0 625 | 0,94,70,27,115,43.5,0.347,21,0 626 | 2,108,64,0,0,30.8,0.158,21,0 627 | 4,90,88,47,54,37.7,0.362,29,0 628 | 0,125,68,0,0,24.7,0.206,21,0 629 | 0,132,78,0,0,32.4,0.393,21,0 630 | 5,128,80,0,0,34.6,0.144,45,0 631 | 4,94,65,22,0,24.7,0.148,21,0 632 | 7,114,64,0,0,27.4,0.732,34,1 633 | 0,102,78,40,90,34.5,0.238,24,0 634 | 2,111,60,0,0,26.2,0.343,23,0 635 | 1,128,82,17,183,27.5,0.115,22,0 636 | 10,92,62,0,0,25.9,0.167,31,0 637 | 13,104,72,0,0,31.2,0.465,38,1 638 | 5,104,74,0,0,28.8,0.153,48,0 639 | 2,94,76,18,66,31.6,0.649,23,0 640 | 7,97,76,32,91,40.9,0.871,32,1 641 | 1,100,74,12,46,19.5,0.149,28,0 642 | 0,102,86,17,105,29.3,0.695,27,0 643 | 4,128,70,0,0,34.3,0.303,24,0 644 | 6,147,80,0,0,29.5,0.178,50,1 645 | 4,90,0,0,0,28,0.61,31,0 646 | 3,103,72,30,152,27.6,0.73,27,0 647 | 2,157,74,35,440,39.4,0.134,30,0 648 | 1,167,74,17,144,23.4,0.447,33,1 649 | 0,179,50,36,159,37.8,0.455,22,1 650 | 11,136,84,35,130,28.3,0.26,42,1 651 | 0,107,60,25,0,26.4,0.133,23,0 652 | 1,91,54,25,100,25.2,0.234,23,0 653 | 1,117,60,23,106,33.8,0.466,27,0 654 | 5,123,74,40,77,34.1,0.269,28,0 655 | 2,120,54,0,0,26.8,0.455,27,0 656 | 1,106,70,28,135,34.2,0.142,22,0 657 | 2,155,52,27,540,38.7,0.24,25,1 658 | 2,101,58,35,90,21.8,0.155,22,0 659 | 1,120,80,48,200,38.9,1.162,41,0 660 | 11,127,106,0,0,39,0.19,51,0 661 | 3,80,82,31,70,34.2,1.292,27,1 662 | 10,162,84,0,0,27.7,0.182,54,0 663 | 1,199,76,43,0,42.9,1.394,22,1 664 | 8,167,106,46,231,37.6,0.165,43,1 665 | 9,145,80,46,130,37.9,0.637,40,1 666 | 6,115,60,39,0,33.7,0.245,40,1 667 | 1,112,80,45,132,34.8,0.217,24,0 668 | 4,145,82,18,0,32.5,0.235,70,1 669 | 10,111,70,27,0,27.5,0.141,40,1 670 | 6,98,58,33,190,34,0.43,43,0 671 | 9,154,78,30,100,30.9,0.164,45,0 672 | 6,165,68,26,168,33.6,0.631,49,0 673 | 1,99,58,10,0,25.4,0.551,21,0 674 | 10,68,106,23,49,35.5,0.285,47,0 675 | 3,123,100,35,240,57.3,0.88,22,0 676 | 8,91,82,0,0,35.6,0.587,68,0 677 | 6,195,70,0,0,30.9,0.328,31,1 678 | 9,156,86,0,0,24.8,0.23,53,1 679 | 0,93,60,0,0,35.3,0.263,25,0 680 | 3,121,52,0,0,36,0.127,25,1 681 | 2,101,58,17,265,24.2,0.614,23,0 682 | 2,56,56,28,45,24.2,0.332,22,0 683 | 0,162,76,36,0,49.6,0.364,26,1 684 | 0,95,64,39,105,44.6,0.366,22,0 685 | 4,125,80,0,0,32.3,0.536,27,1 686 | 5,136,82,0,0,0,0.64,69,0 687 | 2,129,74,26,205,33.2,0.591,25,0 688 | 3,130,64,0,0,23.1,0.314,22,0 689 | 1,107,50,19,0,28.3,0.181,29,0 690 | 1,140,74,26,180,24.1,0.828,23,0 691 | 1,144,82,46,180,46.1,0.335,46,1 692 | 8,107,80,0,0,24.6,0.856,34,0 693 | 13,158,114,0,0,42.3,0.257,44,1 694 | 2,121,70,32,95,39.1,0.886,23,0 695 | 7,129,68,49,125,38.5,0.439,43,1 696 | 2,90,60,0,0,23.5,0.191,25,0 697 | 7,142,90,24,480,30.4,0.128,43,1 698 | 3,169,74,19,125,29.9,0.268,31,1 699 | 0,99,0,0,0,25,0.253,22,0 700 | 4,127,88,11,155,34.5,0.598,28,0 701 | 4,118,70,0,0,44.5,0.904,26,0 702 | 2,122,76,27,200,35.9,0.483,26,0 703 | 6,125,78,31,0,27.6,0.565,49,1 704 | 1,168,88,29,0,35,0.905,52,1 705 | 2,129,0,0,0,38.5,0.304,41,0 706 | 4,110,76,20,100,28.4,0.118,27,0 707 | 6,80,80,36,0,39.8,0.177,28,0 708 | 10,115,0,0,0,0,0.261,30,1 709 | 2,127,46,21,335,34.4,0.176,22,0 710 | 9,164,78,0,0,32.8,0.148,45,1 711 | 2,93,64,32,160,38,0.674,23,1 712 | 3,158,64,13,387,31.2,0.295,24,0 713 | 5,126,78,27,22,29.6,0.439,40,0 714 | 10,129,62,36,0,41.2,0.441,38,1 715 | 0,134,58,20,291,26.4,0.352,21,0 716 | 3,102,74,0,0,29.5,0.121,32,0 717 | 7,187,50,33,392,33.9,0.826,34,1 718 | 3,173,78,39,185,33.8,0.97,31,1 719 | 10,94,72,18,0,23.1,0.595,56,0 720 | 1,108,60,46,178,35.5,0.415,24,0 721 | 5,97,76,27,0,35.6,0.378,52,1 722 | 4,83,86,19,0,29.3,0.317,34,0 723 | 1,114,66,36,200,38.1,0.289,21,0 724 | 1,149,68,29,127,29.3,0.349,42,1 725 | 5,117,86,30,105,39.1,0.251,42,0 726 | 1,111,94,0,0,32.8,0.265,45,0 727 | 4,112,78,40,0,39.4,0.236,38,0 728 | 1,116,78,29,180,36.1,0.496,25,0 729 | 0,141,84,26,0,32.4,0.433,22,0 730 | 2,175,88,0,0,22.9,0.326,22,0 731 | 2,92,52,0,0,30.1,0.141,22,0 732 | 3,130,78,23,79,28.4,0.323,34,1 733 | 8,120,86,0,0,28.4,0.259,22,1 734 | 2,174,88,37,120,44.5,0.646,24,1 735 | 2,106,56,27,165,29,0.426,22,0 736 | 2,105,75,0,0,23.3,0.56,53,0 737 | 4,95,60,32,0,35.4,0.284,28,0 738 | 0,126,86,27,120,27.4,0.515,21,0 739 | 8,65,72,23,0,32,0.6,42,0 740 | 2,99,60,17,160,36.6,0.453,21,0 741 | 1,102,74,0,0,39.5,0.293,42,1 742 | 11,120,80,37,150,42.3,0.785,48,1 743 | 3,102,44,20,94,30.8,0.4,26,0 744 | 1,109,58,18,116,28.5,0.219,22,0 745 | 9,140,94,0,0,32.7,0.734,45,1 746 | 13,153,88,37,140,40.6,1.174,39,0 747 | 12,100,84,33,105,30,0.488,46,0 748 | 1,147,94,41,0,49.3,0.358,27,1 749 | 1,81,74,41,57,46.3,1.096,32,0 750 | 3,187,70,22,200,36.4,0.408,36,1 751 | 6,162,62,0,0,24.3,0.178,50,1 752 | 4,136,70,0,0,31.2,1.182,22,1 753 | 1,121,78,39,74,39,0.261,28,0 754 | 3,108,62,24,0,26,0.223,25,0 755 | 0,181,88,44,510,43.3,0.222,26,1 756 | 8,154,78,32,0,32.4,0.443,45,1 757 | 1,128,88,39,110,36.5,1.057,37,1 758 | 7,137,90,41,0,32,0.391,39,0 759 | 0,123,72,0,0,36.3,0.258,52,1 760 | 1,106,76,0,0,37.5,0.197,26,0 761 | 6,190,92,0,0,35.5,0.278,66,1 762 | 2,88,58,26,16,28.4,0.766,22,0 763 | 9,170,74,31,0,44,0.403,43,1 764 | 9,89,62,0,0,22.5,0.142,33,0 765 | 10,101,76,48,180,32.9,0.171,63,0 766 | 2,122,70,27,0,36.8,0.34,27,0 767 | 5,121,72,23,112,26.2,0.245,30,0 768 | 1,126,60,0,0,30.1,0.349,47,1 769 | 1,93,70,31,0,30.4,0.315,23,0 770 | -------------------------------------------------------------------------------- /module_2/01_din_sql.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Advanced Prompting for Text-to-SQL: DIN-SQL\n", 8 | "Use of advanced prompting techniques to convert a natural language question to SQL" 9 | ] 10 | }, 11 | { 12 | "cell_type": "markdown", 13 | "metadata": {}, 14 | "source": [ 15 | "---" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "## Suggested SageMaker Environment\n", 23 | "Sagemaker Image: sagemaker-distribution-cpu\n", 24 | "\n", 25 | "Kernel: Python 3\n", 26 | "\n", 27 | "Instance Type: ml.m5.large" 28 | ] 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "metadata": {}, 33 | "source": [ 34 | "---" 35 | ] 36 | }, 37 | { 38 | "cell_type": "markdown", 39 | "metadata": {}, 40 | "source": [ 41 | "## Contents\n", 42 | "\n", 43 | "1. [Install Dependencies](#step-1-install-dependencies)\n", 44 | "1. [Set up Athena Connection](#step-2-set-up-connection-to-the-tpc-ds-data-set-in-athena)\n", 45 | "1. [Schema Linking](#step-3-determine-schema-links)\n", 46 | "1. [Classify Query Complexity](#step-4-classify-sql-complexity)\n", 47 | "1. [Generate SQL Query](#step-5-generate-sql-query)\n", 48 | "1. [Execute SQL Query](#step-6-execute-query)\n", 49 | "1. [Validate Results](#step-7-validate-results)\n", 50 | "1. [Self-Correction](#step-8-self-correction)\n", 51 | "1. [Experiment](#step-9-experiment)\n", 52 | "1. [Citation](#citation)" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "metadata": {}, 58 | "source": [ 59 | "---" 60 | ] 61 | }, 62 | { 63 | "cell_type": "markdown", 64 | "metadata": {}, 65 | "source": [ 66 | "## Objective\n", 67 | "This notebook provides code snippets that assist with implementing one approach to converting a natural language question into a SQL query that would answer it." 68 | ] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "metadata": {}, 73 | "source": [ 74 | "---" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "## The Approach to the Text-to-SQL Problem\n", 82 | "We'll implement the DIN-SQL prompting strategy to break a question down into smaller parts, get an understanding of the query complexity, and ultimately create a valid SQL statement. As shown below, this process consists of four main prompting steps:\n", 83 | "\n", 84 | "1. Schema Linking\n", 85 | "2. Classification and decomposition\n", 86 | "3. SQL code generation\n", 87 | "4. Self-correction\n", 88 | "\n", 89 | "For a deeper dive into the methodology and findings about this approach, please read the full paper here: https://arxiv.org/pdf/2304.11015.pdf" 90 | ] 91 | }, 92 | { 93 | "cell_type": "markdown", 94 | "metadata": {}, 95 | "source": [ 96 | "![Alt text](content/din_sql_methodology.png)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "### Tools\n", 104 | "SQLAlchemy, Anthropic, Amazon Bedrock SDK (Boto3), PyAthena, Jinja2" 105 | ] 106 | }, 107 | { 108 | "cell_type": "markdown", 109 | "metadata": {}, 110 | "source": [ 111 | "---" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "metadata": {}, 117 | "source": [ 118 | "### Step 1: Install Dependencies" 119 | ] 120 | }, 121 | { 122 | "cell_type": "markdown", 123 | "metadata": {}, 124 | "source": [ 125 | "Here we will install all the required dependencies to run this notebook. **You can ignore the following errors** that may arise due to dependency conflicts for libraries we won't be using in this module:\n", 126 | "```\n", 127 | "ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", 128 | "dash 2.14.1 requires dash-core-components==2.0.0, which is not installed.\n", 129 | "dash 2.14.1 requires dash-html-components==2.0.0, which is not installed.\n", 130 | "dash 2.14.1 requires dash-table==5.0.0, which is not installed.\n", 131 | "jupyter-ai 2.5.0 requires faiss-cpu, which is not installed.\n", 132 | "amazon-sagemaker-jupyter-scheduler 3.0.4 requires pydantic==1.*, but you have pydantic 2.6.0 which is incompatible.\n", 133 | "gluonts 0.13.7 requires pydantic~=1.7, but you have pydantic 2.6.0 which is incompatible.\n", 134 | "jupyter-ai 2.5.0 requires pydantic~=1.0, but you have pydantic 2.6.0 which is incompatible.\n", 135 | "jupyter-ai-magics 2.5.0 requires pydantic~=1.0, but you have pydantic 2.6.0 which is incompatible.\n", 136 | "jupyter-scheduler 2.3.0 requires pydantic~=1.10, but you have pydantic 2.6.0 which is incompatible.\n", 137 | "sparkmagic 0.21.0 requires pandas<2.0.0,>=0.17.1, but you have pandas 2.1.2 which is incompatible.\n", 138 | "tensorflow 2.12.1 requires typing-extensions<4.6.0,>=3.6.6, but you have typing-extensions 4.9.0 which is incompatible.\n", 139 | "```\n" 140 | ] 141 | }, 142 | { 143 | "cell_type": "code", 144 | "execution_count": null, 145 | "metadata": {}, 146 | "outputs": [], 147 | "source": [ 148 | "!python -m ensurepip --upgrade\n", 149 | "%pip install -qU sqlalchemy\n", 150 | "%pip install -q \"boto3~=1.34\"\n", 151 | "%pip install -qU jinja2\n", 152 | "%pip install -qU botocore\n", 153 | "%pip install -qU pandas\n", 154 | "%pip install -qU PyAthena\n", 155 | "%pip install -qU faiss-cpu" 156 | ] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "metadata": {}, 161 | "source": [ 162 | "Import the `din_sql` library to assist with using the prompts written in the paper. Note that we've leveraged Jinja for our prompt templating." 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "import sys\n", 172 | "\n", 173 | "import boto3\n", 174 | "import pandas as pd\n", 175 | "\n", 176 | "sys.path.append('../')\n", 177 | "from libs.din_sql import din_sql_lib as dsl\n", 178 | "import utilities as u" 179 | ] 180 | }, 181 | { 182 | "cell_type": "markdown", 183 | "metadata": {}, 184 | "source": [ 185 | "### Step 2: Set up a connection to the TPC-DS data set in Athena" 186 | ] 187 | }, 188 | { 189 | "cell_type": "markdown", 190 | "metadata": {}, 191 | "source": [ 192 | "Initialize the following variables with details relative to your account, and how you setup the Athena data source connector for the TPC-DS dataset. You can find in these in the CloudFormation outputs." 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "metadata": {}, 199 | "outputs": [], 200 | "source": [ 201 | "ATHENA_RESULTS_S3_LOCATION, ATHENA_CATALOG_NAME = \\\n", 202 | " u.extract_CF_outputs(\"AthenaResultsS3Location\", \"AthenaCatalogName\")\n", 203 | "# ATHENA_RESULTS_S3_LOCATION = \"\" # available in cloudformation outputs\n", 204 | "# ATHENA_CATALOG_NAME = \"\" # available in cloudformation outputs\n", 205 | "# ATHENA_RESULTS_S3_BUCKET = u.extract_s3_bucket(ATHENA_RESULTS_S3_LOCATION)\n", 206 | "DB_NAME = \"tpcds1\"\n", 207 | "ATHENA_RESULTS_S3_LOCATION, ATHENA_CATALOG_NAME, DB_NAME" 208 | ] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "metadata": {}, 213 | "source": [ 214 | "Instantiate the `din_sql` class with the bedrock model of your choice. In this module, the prompts are tailored specifically to work well with Claude V2, so we'll be using that." 215 | ] 216 | }, 217 | { 218 | "cell_type": "code", 219 | "execution_count": null, 220 | "metadata": {}, 221 | "outputs": [], 222 | "source": [ 223 | "din_sql = dsl.DIN_SQL(bedrock_model_id='anthropic.claude-v2')" 224 | ] 225 | }, 226 | { 227 | "cell_type": "markdown", 228 | "metadata": {}, 229 | "source": [ 230 | "Create a connection to Athena using the information entered above. We'll use this connection to test our generated SQL. Its also used to augment prompts in DIN-SQL." 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": {}, 237 | "outputs": [], 238 | "source": [ 239 | "din_sql.athena_connect(catalog_name=ATHENA_CATALOG_NAME, \n", 240 | " db_name=DB_NAME, \n", 241 | " s3_prefix=ATHENA_RESULTS_S3_LOCATION)" 242 | ] 243 | }, 244 | { 245 | "cell_type": "markdown", 246 | "metadata": {}, 247 | "source": [ 248 | "### Step 3: Determine Schema Links " 249 | ] 250 | }, 251 | { 252 | "cell_type": "markdown", 253 | "metadata": {}, 254 | "source": [ 255 | "The first step in the DIN-SQL process is to find out which foreign key relationships are required in order to answer the question. Let's take a look at how the prompt for this task is designed." 256 | ] 257 | }, 258 | { 259 | "cell_type": "code", 260 | "execution_count": null, 261 | "metadata": {}, 262 | "outputs": [], 263 | "source": [ 264 | "!head ../libs/din_sql/prompt_templates/schema_linking_prompt.txt.jinja" 265 | ] 266 | }, 267 | { 268 | "cell_type": "code", 269 | "execution_count": null, 270 | "metadata": {}, 271 | "outputs": [], 272 | "source": [ 273 | "return_sql = din_sql.find_fields(db_name=DB_NAME)\n", 274 | "print(return_sql)" 275 | ] 276 | }, 277 | { 278 | "cell_type": "markdown", 279 | "metadata": {}, 280 | "source": [ 281 | "If you take a look at the prompt template, you can see we're using some [Anthropic Prompting best practices](https://docs.anthropic.com/claude/docs/introduction-to-prompt-design) to improve results when working with Claude:\n", 282 | "1. [Mark different parts of the prompt](https://docs.anthropic.com/claude/docs/constructing-a-prompt#mark-different-parts-of-the-prompt) using XML tags. In our example, we use xml tags and ```sql to organize our output\n", 283 | "2. [We use many examples](https://docs.anthropic.com/claude/docs/constructing-a-prompt#examples-optional) This prompt technique uses a many-shot method by offering Claude a lot of examples.\n", 284 | "3. [We ask Claude to think step-by-step](https://docs.anthropic.com/claude/docs/ask-claude-to-think-step-by-step)\n", 285 | "4. We use [Roleplay Dialogue](https://docs.anthropic.com/claude/docs/roleplay-dialogue) to help Claude act the part of a relational database expert.\n", 286 | "\n", 287 | "Lets see how our prompt will look by passing in a question and database name to the `schema_linking_prompt_maker` method. Note the use of tags." 288 | ] 289 | }, 290 | { 291 | "cell_type": "code", 292 | "execution_count": null, 293 | "metadata": {}, 294 | "outputs": [], 295 | "source": [ 296 | "question = \"Which customer spent the most money in the web store?\"\n", 297 | "\n", 298 | "schema_links_prompt = din_sql.schema_linking_prompt_maker(question, DB_NAME)\n", 299 | "print(schema_links_prompt)" 300 | ] 301 | }, 302 | { 303 | "cell_type": "markdown", 304 | "metadata": {}, 305 | "source": [ 306 | "Before we make our inference for schema links, let's [put words in Claude's mouth](https://docs.anthropic.com/claude/reference/migrating-from-text-completions-to-messages#putting-words-in-claudes-mouth) by providing the beginning of the assistants answer and leveraging the `word_in_mouth` parameter of our `llm_generation` method" 307 | ] 308 | }, 309 | { 310 | "cell_type": "code", 311 | "execution_count": null, 312 | "metadata": {}, 313 | "outputs": [], 314 | "source": [ 315 | "word_in_mouth_schema_link = f'A. Let’s think step by step. In the question \"{question}\", we are asked:'" 316 | ] 317 | }, 318 | { 319 | "cell_type": "markdown", 320 | "metadata": {}, 321 | "source": [ 322 | "Now that we have our schema link prompt, lets see what Claude comes up with. " 323 | ] 324 | }, 325 | { 326 | "cell_type": "code", 327 | "execution_count": null, 328 | "metadata": {}, 329 | "outputs": [], 330 | "source": [ 331 | "schema_links = din_sql.llm_generation(\n", 332 | " schema_links_prompt,\n", 333 | " stop_sequences=[''],\n", 334 | " word_in_mouth=word_in_mouth_schema_link\n", 335 | " )\n", 336 | "print(f\"{word_in_mouth_schema_link}{schema_links}\")" 337 | ] 338 | }, 339 | { 340 | "cell_type": "markdown", 341 | "metadata": {}, 342 | "source": [ 343 | "As you can see, Claude reasoned its way through identifying the foreign key relationships between tables. This is because we gave it a list of tables and their columns for Claude to inspect. Let's use those `` tags to clean up our response, and store this list for our next step in the DIN-SQL method." 344 | ] 345 | }, 346 | { 347 | "cell_type": "code", 348 | "execution_count": null, 349 | "metadata": {}, 350 | "outputs": [], 351 | "source": [ 352 | "links = u.extract_tag(schema_links+\"\", \"links\")[0].strip()\n", 353 | "links" 354 | ] 355 | }, 356 | { 357 | "cell_type": "markdown", 358 | "metadata": {}, 359 | "source": [ 360 | "### Step 4: Classify SQL Complexity" 361 | ] 362 | }, 363 | { 364 | "cell_type": "markdown", 365 | "metadata": {}, 366 | "source": [ 367 | "The next step in the process is to classify the complexity of the SQL that will be required to answer the question. Lets take a look at the prompt" 368 | ] 369 | }, 370 | { 371 | "cell_type": "code", 372 | "execution_count": null, 373 | "metadata": {}, 374 | "outputs": [], 375 | "source": [ 376 | "!head ../libs/din_sql/prompt_templates/classification_prompt.txt.jinja" 377 | ] 378 | }, 379 | { 380 | "cell_type": "markdown", 381 | "metadata": {}, 382 | "source": [ 383 | "Here we're giving Claude a decision making framework for determining if the class of the query required to answer the question. This is done by offering simple if/then logic.\n", 384 | "\n", 385 | "Feel free to take a closer look at how this prompt uses examples of each class to teach Claude how to make decisions. Once complete, go ahead and send your prompt to Claude to classify the complexity of this query." 386 | ] 387 | }, 388 | { 389 | "cell_type": "code", 390 | "execution_count": null, 391 | "metadata": {}, 392 | "outputs": [], 393 | "source": [ 394 | "word_in_mouth_classify = \"A: Let’s think step by step.\"\n", 395 | "classification = din_sql.llm_generation(\n", 396 | " prompt=din_sql.classification_prompt_maker(question, DB_NAME, links),\n", 397 | " word_in_mouth=word_in_mouth_classify\n", 398 | " )\n", 399 | "print(f\"{word_in_mouth_classify}{classification}\")" 400 | ] 401 | }, 402 | { 403 | "cell_type": "markdown", 404 | "metadata": {}, 405 | "source": [ 406 | "You can see that Claude is taking advantage of the room we gave it think about the decision. Let's parse the result using the `