├── .github └── workflows │ ├── pr.yml │ └── release.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── LICENSE_HEADER.tpl ├── Makefile ├── README.md ├── add_license_headers.sh ├── camel_database_agent ├── __init__.py ├── cli.py ├── core │ ├── __init__.py │ ├── exceptions.py │ └── method_lru_cache.py ├── database │ ├── __init__.py │ ├── dialect │ │ ├── __init__.py │ │ ├── dialect.py │ │ ├── dialect_mysql.py │ │ ├── dialect_postgresql.py │ │ └── dialect_sqlite.py │ ├── manager.py │ ├── prompts.py │ └── schema.py ├── database_agent.py ├── database_base.py ├── database_prompt.py ├── datagen │ ├── __init__.py │ ├── pipeline.py │ └── prompts.py ├── knowledge │ ├── __init__.py │ ├── knowledge.py │ └── knowledge_qdrant.py └── py.typed ├── conftest.py ├── database ├── mysql │ ├── 1_ddl_sql.sql │ └── 2_data_sql.sql ├── postgresql │ ├── 1_ddl_sql.sql │ └── 2_data_sql.sql └── sqlite │ ├── music.sqlite │ └── school_scheduling.sqlite ├── docs ├── demo_video.png ├── screenshot-music-database.png ├── screenshot-question-1.png ├── screenshot-question-2.png ├── screenshot-question-3.png ├── screenshot-question-4.png ├── screenshot-question-5.png ├── screenshot-question-6.png ├── screenshot-question-7.png ├── screenshot-question-chinese.png ├── screenshot-question-korean.png └── screenshot-school-scheduling-database.png ├── example.py ├── pyproject.toml ├── scripts ├── check_imports.py └── lint_imports.sh ├── spider2_lite ├── README.md ├── database │ └── README.md ├── spider2-lite-back.jsonl ├── spider2-lite.jsonl └── spider2_run.py ├── tests ├── __init__.py ├── integration_tests │ ├── __init__.py │ ├── data.sql │ ├── ddl.sql │ ├── query.md │ ├── test_database_agent.py │ ├── test_database_knowledge.py │ ├── test_database_manager.py │ ├── test_database_schema_dialect.py │ ├── test_database_schema_parse.py │ └── test_sql_query_inference_pipeline.py └── unit_tests │ └── __init__.py └── uv.lock /.github/workflows/pr.yml: -------------------------------------------------------------------------------- 1 | name: Python application 2 | 3 | on: 4 | push: 5 | branches: ["main"] 6 | pull_request: 7 | branches: ["main"] 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | lint-and-test: 14 | runs-on: ubuntu-latest 15 | env: 16 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 17 | steps: 18 | - uses: actions/checkout@v4 19 | 20 | - name: Set up Python 3.10 21 | uses: actions/setup-python@v4 22 | with: 23 | python-version: "3.10" 24 | cache: 'pip' 25 | 26 | - name: Cache virtual environment 27 | uses: actions/cache@v3 28 | with: 29 | path: .venv 30 | key: venv-${{ runner.os }}-python-${{ hashFiles('pyproject.toml') }}-v2 31 | restore-keys: | 32 | venv-${{ runner.os }}-python-v2 33 | 34 | - name: Install dependencies 35 | run: | 36 | python -m pip install --upgrade pip 37 | pip install uv ruff mypy 38 | uv venv .venv --python=3.10 39 | uv sync --all-extras 40 | 41 | - name: Lint 42 | run: | 43 | source .venv/bin/activate 44 | make lint 45 | 46 | - name: Run tests 47 | run: | 48 | source .venv/bin/activate 49 | make tests -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | permissions: 8 | contents: read 9 | 10 | jobs: 11 | deploy: 12 | 13 | runs-on: ubuntu-latest 14 | 15 | steps: 16 | - uses: actions/checkout@v3 17 | - name: Set up Python 18 | uses: actions/setup-python@v3 19 | with: 20 | python-version: '3.10' 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip 24 | pip install build 25 | - name: Build package 26 | run: python -m build 27 | - name: Publish package 28 | uses: pypa/gh-action-pypi-publish@release/v1 29 | with: 30 | user: __token__ 31 | password: ${{ secrets.PYPI_API_TOKEN }} 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .env_* 3 | .idea 4 | __pycache__ 5 | .DS_Store 6 | .mypy_cache 7 | .mypy_cache_* 8 | .venv 9 | 10 | spider2_lite/database/local_sqlite 11 | spider2_lite/workspace 12 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/astral-sh/ruff-pre-commit 3 | rev: 'v0.7.4' 4 | hooks: 5 | - id: ruff 6 | args: [--fix, --exit-non-zero-on-fix, --show-fixes] 7 | types: [python] 8 | - id: ruff-format 9 | types: [python] 10 | - repo: local 11 | hooks: 12 | - id: mypy 13 | name: Check mypy 14 | entry: mypy --namespace-packages -p camel_database_agent -p tests 15 | language: python 16 | types: [python] 17 | pass_filenames: false 18 | require_serial: true 19 | always_run: true 20 | verbose: true -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. -------------------------------------------------------------------------------- /LICENSE_HEADER.tpl: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all format lint test tests integration_tests docker_tests help extended_tests 2 | 3 | # Default target executed when no arguments are given to make. 4 | all: help 5 | 6 | ###################### 7 | # LINTING AND FORMATTING 8 | ###################### 9 | 10 | # Define a variable for Python and notebook files. 11 | PYTHON_FILES=. 12 | MYPY_CACHE=.mypy_cache 13 | lint format: PYTHON_FILES=. 14 | lint_package: PYTHON_FILES=camel_database_agent 15 | lint_tests: PYTHON_FILES=tests 16 | lint_tests: MYPY_CACHE=.mypy_cache_test 17 | 18 | lint lint_diff lint_package lint_tests: 19 | ruff -V 20 | mypy -V 21 | [ "$(PYTHON_FILES)" = "" ] || ruff check $(PYTHON_FILES) 22 | [ "$(PYTHON_FILES)" = "" ] || ruff format $(PYTHON_FILES) --diff 23 | [ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE) 24 | 25 | format format_diff: 26 | [ "$(PYTHON_FILES)" = "" ] || ruff format $(PYTHON_FILES) 27 | [ "$(PYTHON_FILES)" = "" ] || ruff check --select I --fix $(PYTHON_FILES) 28 | 29 | spell_check: 30 | codespell --toml pyproject.toml 31 | 32 | spell_fix: 33 | codespell --toml pyproject.toml -w 34 | 35 | check_imports: $(shell find camel_database_agent -name '*.py') 36 | python ./scripts/check_imports.py $^ 37 | 38 | test: 39 | pytest tests 40 | 41 | ###################### 42 | # HELP 43 | ###################### 44 | 45 | help: 46 | @echo '----' 47 | @echo 'check_imports - check imports' 48 | @echo 'format - run code formatters' 49 | @echo 'lint - run linters' 50 | @echo 'tests - run unit tests' -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CAMEL DatabaseAgent 2 | 3 | [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 4 | [![PyPi version](https://img.shields.io/pypi/v/camel-database-agent.svg)](https://pypi.org/project/camel-database-agent/) 5 | [![build](https://github.com/coolbeevip/camel-database-agent/actions/workflows/pr.yml/badge.svg)](https://github.com/coolbeevip/camel-database-agent/actions/workflows/pr.yml) 6 | [![](https://img.shields.io/pypi/dm/camel-database-agent)](https://pypi.org/project/camel-database-agent/) 7 | 8 | An open-source toolkit helping developers build natural language database query solutions based on [CAMEL-AI](https://github.com/camel-ai/camel). 9 | 10 | ## Core Components 11 | 12 | - **DataQueryInferencePipeline**: A pipeline that transforms database schema and sample data into query few-shot examples (questions and corresponding SQL) 13 | - **DatabaseKnowledge**: A vector database storing database schema, sample data, and query few-shot examples 14 | - **DatabaseAgent**: An intelligent agent based on the CAMEL framework that utilizes DatabaseKnowledge to answer user questions 15 | 16 | Features: 17 | 18 | - [x] Read-Only mode 19 | - [x] SQLite 20 | - [x] MySQL 21 | - [x] PostgreSQL 22 | - [ ] Spider 2.0-Lite evaluation (planned) 23 | 24 | ## Quick Start 25 | 26 | Clone the repository and install the dependencies. 27 | 28 | ```shell 29 | git clone git@github.com:coolbeevip/camel-database-agent.git 30 | cd camel-database-agent 31 | pip install uv ruff mypy 32 | uv venv .venv --python=3.10 33 | source .venv/bin/activate 34 | uv sync --all-extras 35 | ```` 36 | 37 | #### Music Database 38 | 39 | > This database serves as a comprehensive data model for a digital music distribution platform, encompassing various aspects of artist management, customer interactions, and sales transactions. 40 | 41 | Connect to `database/sqlite/music.sqlite` database and use `openai` API to answer questions. 42 | 43 | **NOTE: The first connection will take a few minutes to generate knowledge data.** 44 | 45 | ```shell 46 | source .venv/bin/activate 47 | export OPENAI_API_KEY=sk-xxx 48 | export OPENAI_API_BASE_URL=https://api.openai.com/v1/ 49 | export MODEL_NAME=gpt-4o-mini 50 | export EMBEDD_MODEL_NAME=text-embedding-ada-002 51 | python camel_database_agent/cli.py \ 52 | --database-url sqlite:///database/sqlite/music.sqlite 53 | ``` 54 | ![](docs/screenshot-music-database.png) 55 | 56 | #### School Scheduling Database 57 | 58 | > This database serves as a comprehensive data model for an educational institution, encompassing various aspects of student, faculty, and course management. It includes modules for building management, staff and faculty details, student information, course offerings, and class scheduling 59 | 60 | Connect to `database/sqlite/school_scheduling.sqlite` database and use `openai` API to answer questions a Chinese. 61 | 62 | ```shell 63 | source .venv/bin/activate 64 | export OPENAI_API_KEY=sk-xxx 65 | export OPENAI_API_BASE_URL=https://api.openai.com/v1/ 66 | python camel_database_agent/cli.py \ 67 | --database-url sqlite:///database/sqlite/school_scheduling.sqlite \ 68 | --language Chinese 69 | ``` 70 | 71 | ![](docs/screenshot-school-scheduling-database.png) 72 | 73 | ## Demo Video 74 | 75 | [![CAMEL DatabaseAgent Demo](docs/demo_video.png)](https://youtu.be/Fl065DB8Wqo "Watch the CAMEL DatabaseAgent Demo") 76 | 77 | ## Command Line Options 78 | 79 | > usage: cli.py [-h] --database-url DATABASE_URL [--openai-api-key OPENAI_API_KEY] [--openai-api-base-url OPENAI_API_BASE_URL] [--reset-train] [--read-only] [--language LANGUAGE] 80 | 81 | * database-url: The database [URLs](https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls) to connect to. 82 | * openai-api-key: The OpenAI API key. 83 | * openai-api-base-url: The OpenAI API base URL(default is https://api.openai.com/v1/). 84 | * reset-train: Reset the training data. 85 | * read-only: Read-only mode. 86 | * language: Language used to generate training data. 87 | 88 | ## MySQL 89 | 90 | Start a MySQL container with the following command: 91 | 92 | ```shell 93 | docker run -d \ 94 | --name camel_mysql \ 95 | -e MYSQL_ROOT_PASSWORD=123456 \ 96 | -e MYSQL_DATABASE=school_scheduling \ 97 | -e MYSQL_USER=camel \ 98 | -e MYSQL_PASSWORD=123456 \ 99 | -p 3306:3306 \ 100 | -v $(pwd)/database/mysql:/docker-entrypoint-initdb.d \ 101 | mysql:9 102 | ``` 103 | 104 | Connect to the MySQL database to answer questions. 105 | 106 | ```shell 107 | python camel_database_agent/cli.py \ 108 | --database-url mysql+pymysql://camel:123456@127.0.0.1:3306/school_scheduling 109 | ``` 110 | 111 | ## PostgreSQL 112 | 113 | Start a PostgreSQL container with the following command: 114 | 115 | ```shell 116 | docker run -d \ 117 | --name camel_postgresql \ 118 | -e POSTGRES_USER=camel \ 119 | -e POSTGRES_PASSWORD=123456 \ 120 | -e POSTGRES_DB=school_scheduling \ 121 | -p 5432:5432 \ 122 | -v $(pwd)/database/postgresql:/docker-entrypoint-initdb.d \ 123 | postgres:17 124 | ``` 125 | 126 | Connect to the PostgreSQL database to answer questions. 127 | 128 | ```shell 129 | python camel_database_agent/cli.py \ 130 | --database-url postgresql://camel:123456@localhost:5432/school_scheduling 131 | ``` 132 | 133 | ## Developer Integration 134 | 135 | ```python 136 | import logging 137 | import os 138 | import sys 139 | import uuid 140 | 141 | import pandas as pd 142 | from camel.embeddings import OpenAIEmbedding 143 | from camel.models import ModelFactory 144 | from camel.types import ModelPlatformType, ModelType 145 | from colorama import Fore 146 | from tabulate import tabulate 147 | 148 | from camel_database_agent import DatabaseAgent 149 | from camel_database_agent.database.manager import DatabaseManager 150 | from camel_database_agent.database_base import TrainLevel 151 | 152 | # Configure logging settings to show errors on stdout 153 | logging.basicConfig( 154 | level=logging.ERROR, 155 | format="%(message)s", 156 | handlers=[logging.StreamHandler(sys.stdout)], 157 | force=True, 158 | ) 159 | # Set specific logging level for the application module 160 | logging.getLogger("camel_database_agent").setLevel(logging.INFO) 161 | logger = logging.getLogger(__name__) 162 | 163 | # Configure pandas display options to show complete data 164 | pd.set_option("display.max_rows", None) # Show all rows 165 | pd.set_option("display.max_columns", None) # Show all columns 166 | pd.set_option("display.width", None) # Auto-detect display width 167 | pd.set_option("display.max_colwidth", None) # Show full content of each cell 168 | 169 | # Define database connection string 170 | database_url = "sqlite:///database/sqlite/music.sqlite" 171 | 172 | # Initialize the database agent with required components 173 | database_agent = DatabaseAgent( 174 | interactive_mode=True, 175 | database_manager=DatabaseManager(db_url=database_url), 176 | # Configure LLM model 177 | model=ModelFactory.create( 178 | model_platform=ModelPlatformType.OPENAI, 179 | model_type=ModelType.GPT_4O_MINI, 180 | api_key=os.getenv("OPENAI_API_KEY"), 181 | url=os.getenv("OPENAI_API_BASE_URL"), 182 | ), 183 | # Configure embedding model 184 | embedding_model=OpenAIEmbedding( 185 | api_key=os.getenv("OPENAI_API_KEY"), 186 | url=os.getenv("OPENAI_API_BASE_URL"), 187 | ) 188 | ) 189 | 190 | # Train agent's knowledge about the database schema 191 | database_agent.train_knowledge( 192 | # Training level for database knowledge extraction 193 | # MEDIUM level: Balances training time and knowledge depth by: 194 | # - Analyzing schema relationships 195 | # - Extracting representative sample data 196 | # - Generating a moderate number of query examples 197 | level=TrainLevel.MEDIUM, 198 | # Whether to retrain the knowledge base from scratch 199 | # If True: Forces regeneration of all database insights and examples 200 | # If False: Uses existing cached knowledge if available 201 | reset_train=False, 202 | ) 203 | 204 | # Display database overview information 205 | print(f"{Fore.GREEN}Database Overview") 206 | print("=" * 50) 207 | print(f"{database_agent.get_summary()}\n\n{Fore.RESET}") 208 | 209 | # Display recommended example questions 210 | print(f"{Fore.GREEN}Recommendation Question") 211 | print("=" * 50) 212 | print(f"{database_agent.get_recommendation_question()}\n\n{Fore.RESET}") 213 | 214 | # Execute a sample query using natural language 215 | response = database_agent.ask(session_id=str(uuid.uuid4()), 216 | question="List all playlists with more than 5 tracks") 217 | 218 | # Handle and display the query results 219 | if response.success: 220 | if response.dataset is not None: 221 | # Format successful results as a table 222 | data = tabulate( 223 | tabular_data=response.dataset, headers='keys', tablefmt='psql' 224 | ) 225 | print(f"{Fore.GREEN}{data}{Fore.RESET}") 226 | else: 227 | print(f"{Fore.GREEN}No results found.{Fore.RESET}") 228 | # Display the SQL that was generated 229 | print(f"{Fore.YELLOW}{response.sql}{Fore.RESET}") 230 | else: 231 | # Display error message if query failed 232 | print(f"{Fore.RED}+ {response.error}{Fore.RESET}") 233 | ``` 234 | 235 | Output 236 | 237 | ```shell 238 | $ python example.py 239 | Successfully connected to database: sqlite:///database/sqlite/music.sqlite 240 | Workspace: /Users/zhanglei/camel_database_agent_data 241 | Train knowledge Took 0.1063 seconds 242 | Database Overview 243 | ================================================== 244 | This database is designed to support a digital music platform, encompassing key features for artist management, employee administration, customer relations, and sales transactions. 245 | 246 | ### Key Features: 247 | 248 | 1. **Artist and Album Management**: 249 | The `Artist` and `Album` tables form the foundation for managing musical artists and their respective albums. Each artist is uniquely identified and can have multiple albums linked to them, allowing for comprehensive tracking of discographies. 250 | 251 | 2. **Employee and Customer Management**: 252 | The `Employee` table captures detailed information about staff, including their roles, contact details, and reporting structure, which is essential for organizational management. The `Customer` table holds customer profiles, enabling personalized service and facilitating communication through provided contact information. 253 | 254 | 3. **Media Type and Genre Classification**: 255 | The `MediaType` and `Genre` tables classify music tracks, enabling easy filtering and searching for users based on their preferences for specific genres or media types (e.g. digital downloads, CDs). This classification enhances user experience by making music discovery intuitive and engaging. 256 | 257 | 4. **Track and Playlist Management**: 258 | The `Track` table contains detailed attributes for individual music tracks, including duration and pricing. The `Playlist` table allows users to create and manage custom playlists, which can enrich user engagement and retention by providing a personalized listening experience. 259 | 260 | 5. **Sales Tracking and Invoicing**: 261 | The `Invoice` and `InvoiceLine` tables keep track of sales transactions, linking customers with the purchases they make. This structure not only supports effective billing through clear associations between invoices and the tracks purchased but also facilitates revenue tracking and financial reporting. The ability to view total sales and detailed line items allows for comprehensive sales analysis. 262 | 263 | 6. **Flexible Design for Data Relationships**: 264 | Through the use of foreign keys and relationships, such as the linkage between customers and their respective invoices, the database provides a robust structure for maintaining data integrity. The design ensures that all relevant information is easily accessible, promoting efficient database utilization. 265 | 266 | Overall, this database structure provides a complete solution for managing a music platform, supporting critical business functions like customer engagement, sales tracking, and music cataloging. It enables organizations to operate efficiently, ensuring a seamless experience for both customers and internal staff. 267 | 268 | 269 | Recommendation Question 270 | ================================================== 271 | List all playlists with more than 5 tracks. 272 | What are the sales figures for each month in 2009? 273 | Show each artist and the number of albums they've released. 274 | What is the total revenue generated from invoices for each customer? 275 | Which tracks belong to the album 'Ball to the Wall'? 276 | 277 | 278 | Question to SQL Took 2.8951 seconds 279 | Execute Query SQL Took 0.1036 seconds 280 | +----+--------------+--------------+ 281 | | | PlaylistId | TrackCount | 282 | |----+--------------+--------------| 283 | | 0 | 1 | 3290 | 284 | | 1 | 3 | 213 | 285 | | 2 | 5 | 1477 | 286 | | 3 | 8 | 3290 | 287 | | 4 | 10 | 213 | 288 | | 5 | 11 | 39 | 289 | | 6 | 12 | 75 | 290 | | 7 | 13 | 25 | 291 | | 8 | 14 | 25 | 292 | | 9 | 15 | 25 | 293 | | 10 | 16 | 15 | 294 | | 11 | 17 | 26 | 295 | +----+--------------+--------------+ 296 | SELECT PlaylistId, COUNT(TrackId) as TrackCount FROM PlaylistTrack GROUP BY PlaylistId HAVING TrackCount > 5; 297 | ``` 298 | 299 | ## Spider 2.0-Lite(Planned) 300 | 301 | [Spider 2.0-Lite](https://github.com/xlang-ai/Spider2/tree/main/spider2-lite) is a text-to-SQL evaluation framework that includes 547 real enterprise-level database use cases, involving various database systems such as BigQuery, Snowflake, and SQLite, to assess the ability of language models in converting text to SQL in complex enterprise environments. 302 | 303 | > This use case attempts to query the SQLite database based on user questions 304 | > and evaluate whether the SQL executes smoothly (**without assessing data accuracy**). 305 | 306 | * spider2_lite/database/local_sqlite - SQLite database file. [Manual download required](spider2_lite/database/README.md). 307 | * spider2_lite/spider2-lite.jsonl - Question and SQL pairs. [Link](https://github.com/xlang-ai/Spider2/blob/main/spider2-lite/spider2-lite.jsonl) 308 | * spider2_lite/spider2_run - Run the Spider 2.0-Lite evaluation. 309 | 310 | Run the Spider 2.0-Lite evaluation. 311 | 312 | ```shell 313 | cd spider2_lite 314 | export OPENAI_API_KEY=sk-xxx 315 | export OPENAI_API_BASE_URL=https://api.openai.com/v1/ 316 | export MODEL_NAME=gpt-4o-mini 317 | python spider2_run.py 318 | ``` -------------------------------------------------------------------------------- /add_license_headers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | find camel_database_agent -type f -name "*.py" | while read -r file 4 | do 5 | if ! grep -q Copyright "$file" 6 | then 7 | # cat LICENSE_HEADER.tpl "$file" >"$file.new" && mv "$file.new" "$file" 8 | { cat LICENSE_HEADER.tpl; echo; cat "$file"; } >"$file.new" && mv "$file.new" "$file" 9 | fi 10 | done -------------------------------------------------------------------------------- /camel_database_agent/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | __all__ = [ 16 | "DataQueryInferencePipeline", 17 | "DatabaseAgent", 18 | "DatabaseSchemaDialectMySQL", 19 | "DatabaseSchemaDialectPostgresql", 20 | "DatabaseSchemaDialectSqlite", 21 | ] 22 | 23 | from camel_database_agent.database.dialect.dialect_mysql import ( 24 | DatabaseSchemaDialectMySQL, 25 | ) 26 | from camel_database_agent.database.dialect.dialect_postgresql import ( 27 | DatabaseSchemaDialectPostgresql, 28 | ) 29 | from camel_database_agent.database.dialect.dialect_sqlite import ( 30 | DatabaseSchemaDialectSqlite, 31 | ) 32 | from camel_database_agent.database_agent import DatabaseAgent 33 | from camel_database_agent.datagen.pipeline import ( 34 | DataQueryInferencePipeline, 35 | ) 36 | -------------------------------------------------------------------------------- /camel_database_agent/cli.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | """ 16 | 设置 pandas 显示选项 17 | """ 18 | 19 | import argparse 20 | import hashlib 21 | import logging 22 | import os 23 | import sys 24 | import uuid 25 | from threading import Event, Thread 26 | from urllib.parse import urlparse 27 | 28 | import pandas as pd 29 | from camel.embeddings import OpenAICompatibleEmbedding 30 | from camel.models import ModelFactory 31 | from camel.types import ModelPlatformType 32 | from colorama import Fore 33 | from tabulate import tabulate 34 | 35 | from camel_database_agent import DatabaseAgent 36 | from camel_database_agent.database.manager import DatabaseManager 37 | from camel_database_agent.database_agent import DatabaseAgentResponse 38 | from camel_database_agent.database_base import TrainLevel, spinner 39 | 40 | """Logging""" 41 | logging.basicConfig( 42 | level=logging.ERROR, 43 | format="%(message)s", 44 | handlers=[logging.StreamHandler(sys.stdout)], 45 | force=True, 46 | ) 47 | logging.getLogger("camel_database_agent").setLevel(logging.INFO) 48 | logger = logging.getLogger(__name__) 49 | 50 | """Pandas display""" 51 | pd.set_option("display.max_rows", None) # Show all rows 52 | pd.set_option("display.max_columns", None) # Show all columns 53 | pd.set_option("display.width", None) # Auto-detect display width 54 | pd.set_option("display.max_colwidth", None) # Show full content of each cell 55 | 56 | 57 | def generate_db_id(db_url: str, language: str) -> str: 58 | """ 59 | Generate a unique ID from a database URL by hashing relevant parts. 60 | 61 | Args: 62 | db_url: SQLAlchemy database connection string 63 | 64 | Returns: 65 | A unique ID string derived from the database connection 66 | """ 67 | # Parse the database URL 68 | parsed_url = urlparse(db_url) 69 | 70 | # Extract components that uniquely identify this database 71 | dialect = parsed_url.scheme 72 | netloc = parsed_url.netloc 73 | path = parsed_url.path 74 | 75 | # Create a string with the most important identifying information 76 | db_identifier = f"{dialect}:{netloc}{path}:{language}" 77 | 78 | # Create a hash of the identifier 79 | db_hash = hashlib.md5(db_identifier.encode()).hexdigest() 80 | 81 | # Use first 12 characters for a reasonably unique but short ID 82 | short_id = db_hash[:12] 83 | 84 | return short_id 85 | 86 | 87 | def main() -> None: 88 | parser = argparse.ArgumentParser(description="Query the database using natural language.") 89 | parser.add_argument( 90 | "--database-url", 91 | "-d", 92 | required=True, 93 | help="Database URL (e.g., sqlite:///db.sqlite)", 94 | ) 95 | parser.add_argument( 96 | "--openai-api-key", 97 | "-key", 98 | required=False, 99 | default=os.environ.get("OPENAI_API_KEY"), 100 | help="OpenAI KEY", 101 | ) 102 | parser.add_argument( 103 | "--openai-api-base-url", 104 | "-url", 105 | required=False, 106 | default=os.environ.get("OPENAI_API_BASE_URL", "https://api.openai.com/v1"), 107 | help="OPENAI API", 108 | ) 109 | parser.add_argument( 110 | "--model-name", 111 | "-em", 112 | required=False, 113 | default=os.environ.get("MODEL_NAME", "gpt-4o-mini"), 114 | help="Model name, such as gpt-3.5-turbo or gpt-4o-mini", 115 | ) 116 | parser.add_argument( 117 | "--embedd-model-name", 118 | "-m", 119 | required=False, 120 | default=os.environ.get("EMBED_MODEL_NAME", "text-embedding-ada-002"), 121 | help="Embedding model name, such as text-embedding-ada-002", 122 | ) 123 | parser.add_argument("--reset-train", "-rt", action="store_true", help="Retraining knowledge") 124 | parser.add_argument( 125 | "--read-only", "-ro", action="store_true", default=True, help="SQL Read-Only Model" 126 | ) 127 | parser.add_argument( 128 | "--language", 129 | "-lang", 130 | required=False, 131 | default="English", 132 | help="The language you used to ask the question, such as English or Chinese.", 133 | ) 134 | parser.add_argument( 135 | "--timeout", 136 | required=False, 137 | default=1800, 138 | help="The timeout value in seconds for API calls.", 139 | ) 140 | args = parser.parse_args() 141 | 142 | # Create a data directory for the database agent 143 | user_home = os.path.expanduser("~") 144 | data_path = os.path.join( 145 | user_home, "camel_database_agent_data", generate_db_id(args.database_url, args.language) 146 | ) 147 | 148 | # Create a database manager and database agent 149 | database_manager = DatabaseManager(db_url=args.database_url) 150 | database_agent = DatabaseAgent( 151 | interactive_mode=True, 152 | database_manager=database_manager, 153 | model=ModelFactory.create( 154 | model_platform=ModelPlatformType.OPENAI_COMPATIBLE_MODEL, 155 | model_type=args.model_name, 156 | api_key=args.openai_api_key, 157 | url=args.openai_api_base_url, 158 | timeout=args.timeout, 159 | ), 160 | embedding_model=OpenAICompatibleEmbedding( 161 | api_key=args.openai_api_key, 162 | url=args.openai_api_base_url, 163 | model_type=args.embedd_model_name, 164 | ), 165 | language=args.language, 166 | data_path=data_path, 167 | ) 168 | token_usage = database_agent.train_knowledge( 169 | level=TrainLevel.MEDIUM, 170 | reset_train=args.reset_train, 171 | ) 172 | 173 | print(f"{Fore.GREEN}") 174 | print("=" * 50) 175 | print(f"{Fore.GREEN}Database Overview") 176 | print("=" * 50) 177 | print(f"{database_agent.get_summary()}") 178 | print("=" * 50) 179 | print(f"{Fore.LIGHTYELLOW_EX}Recommendation Question") 180 | print("=" * 50) 181 | print(f"{database_agent.get_recommendation_question()}{Fore.RESET}") 182 | print(f"{Fore.CYAN}=" * 50) 183 | if args.read_only: 184 | print(f"Interactive Database Agent Query({Fore.GREEN}Read-Only Mode{Fore.RESET})") 185 | else: 186 | print(f"Interactive Database Agent Query({Fore.LIGHTRED_EX}Read-Write Model{Fore.RESET})") 187 | print(f"{Fore.CYAN}Type {Fore.RED}'exit' or 'quit'{Fore.RESET} to end the session") 188 | print( 189 | f"{Fore.CYAN}Type {Fore.LIGHTYELLOW_EX}'help'{Fore.RESET} " 190 | f"to get more recommended questions" 191 | ) 192 | print(f"{Fore.CYAN}Training completed, using {token_usage.total_tokens} tokens{Fore.RESET}") 193 | print(f"{Fore.CYAN}=" * 50) 194 | 195 | session_id = str(uuid.uuid4()) 196 | 197 | while True: 198 | user_question = input(f"{Fore.CYAN}Enter your question: {Fore.RESET}") 199 | user_question = user_question.strip() 200 | if user_question.lower() in ["exit", "quit"]: 201 | print(f"{Fore.YELLOW}Exiting interactive mode. Goodbye!{Fore.RESET}") 202 | break 203 | if user_question.lower() == "help": 204 | print(f"{Fore.GREEN}Database Overview") 205 | print("=" * 50) 206 | print(f"{database_agent.get_summary()}") 207 | print(f"{Fore.LIGHTYELLOW_EX}Recommendation Question") 208 | print("=" * 50) 209 | print(f"{database_agent.get_recommendation_question()}{Fore.RESET}") 210 | elif len(user_question) > 0: 211 | stop_spinner = Event() 212 | spinner_thread = Thread(target=spinner, args=(stop_spinner, "Thinking...")) 213 | spinner_thread.daemon = True 214 | try: 215 | # Set up and start the spinner in a separate thread 216 | spinner_thread.start() 217 | 218 | # Ask the database agent 219 | response: DatabaseAgentResponse = database_agent.ask( 220 | session_id=session_id, 221 | question=user_question, 222 | ) 223 | 224 | # Stop the spinner (it will clear the line) 225 | stop_spinner.set() 226 | spinner_thread.join() 227 | 228 | if response.success: 229 | if response.dataset is not None: 230 | data = tabulate( 231 | tabular_data=response.dataset, headers='keys', tablefmt='psql' 232 | ) 233 | print(f"{Fore.GREEN}{data}{Fore.RESET}") 234 | else: 235 | print(f"{Fore.GREEN}No results found.{Fore.RESET}") 236 | print(f"{Fore.YELLOW}{response.sql}{Fore.RESET}") 237 | else: 238 | print(f"{Fore.RED}+ {response.error}{Fore.RESET}") 239 | if response.usage: 240 | print( 241 | f"{Fore.YELLOW}Tokens used: {response.usage['total_tokens']}{Fore.RESET}" 242 | ) 243 | except Exception as e: 244 | # Make sure to stop the spinner on exception 245 | if 'stop_spinner' in locals() and not stop_spinner.is_set(): 246 | stop_spinner.set() 247 | spinner_thread.join() 248 | print(f"{Fore.RED}ERROR: {e}{Fore.RESET}") 249 | 250 | 251 | if __name__ == "__main__": 252 | main() 253 | -------------------------------------------------------------------------------- /camel_database_agent/core/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /camel_database_agent/core/exceptions.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | 16 | class DataGenerationError(Exception): 17 | """数据生成过程中的异常基类""" 18 | 19 | pass 20 | 21 | 22 | class QueryParsingError(DataGenerationError): 23 | """查询解析错误""" 24 | 25 | pass 26 | 27 | 28 | class KnowledgeException(Exception): 29 | """统一的数据库知识异常类""" 30 | 31 | pass 32 | -------------------------------------------------------------------------------- /camel_database_agent/core/method_lru_cache.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from functools import lru_cache 16 | from typing import Any, Callable, TypeVar, cast 17 | 18 | R = TypeVar('R') 19 | 20 | 21 | def method_lru_cache(maxsize: int = 128): 22 | """A decorator that applies lru_cache to a method safely.""" 23 | 24 | def decorator(func: Callable[..., R]) -> Callable[..., R]: 25 | cache = lru_cache(maxsize=maxsize)(func) 26 | 27 | def wrapper(self, *args: Any, **kwargs: Any) -> R: 28 | return cast(R, cache(self, *args, **kwargs)) 29 | 30 | wrapper.cache_clear = cache.cache_clear # type: ignore[attr-defined] 31 | wrapper.cache_info = cache.cache_info # type: ignore[attr-defined] 32 | return wrapper 33 | 34 | return decorator 35 | -------------------------------------------------------------------------------- /camel_database_agent/database/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /camel_database_agent/database/dialect/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /camel_database_agent/database/dialect/dialect.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import abc 16 | import logging 17 | from typing import ClassVar, List, Optional, Type, TypeVar, Union 18 | 19 | from camel.agents import ChatAgent 20 | from camel.models import BaseModelBackend 21 | from tabulate import tabulate 22 | 23 | from camel_database_agent.database.manager import DatabaseManager 24 | from camel_database_agent.database.prompts import PromptTemplates 25 | 26 | logger = logging.getLogger(__name__) 27 | 28 | T = TypeVar("T", bound="DatabaseSchemaDialect") 29 | 30 | 31 | class DatabaseSchemaDialect(abc.ABC): 32 | dialect_name: str 33 | dialect_map: ClassVar[dict[str, Type["DatabaseSchemaDialect"]]] = {} 34 | schema_polish_agent: ChatAgent 35 | schema: str 36 | 37 | def __init__( 38 | self, 39 | database_manager: DatabaseManager, 40 | model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None, 41 | ): 42 | self.database_manager = database_manager 43 | if model: 44 | self.schema_polish_agent = ChatAgent( 45 | system_message="You are a database expert, proficient in the " 46 | "SQL syntax of various databases.", 47 | model=model, 48 | ) 49 | 50 | @classmethod 51 | def register(cls, dialect_type: Type[T]) -> Type[T]: 52 | if not issubclass(dialect_type, DatabaseSchemaDialect): 53 | raise TypeError(f"Expected subclass of DatabaseSchemaDialect, got {dialect_type}") 54 | cls.dialect_map[dialect_type.dialect_name] = dialect_type 55 | return dialect_type 56 | 57 | @classmethod 58 | def get_dialect( 59 | cls, 60 | dialect_name: str, 61 | database_manager: DatabaseManager, 62 | model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None, 63 | ) -> "DatabaseSchemaDialect": 64 | dialect_type: Type["DatabaseSchemaDialect"] = cls.dialect_map[dialect_name] 65 | return dialect_type(database_manager=database_manager, model=model) 66 | 67 | def get_polished_schema(self, language: str = "English") -> str: 68 | if self.schema_polish_agent: 69 | prompt = PromptTemplates.POLISH_SCHEMA_OUTPUT_EXAMPLE.replace( 70 | "{{ddl_sql}}", self.get_schema() 71 | ).replace("{{language}}", language) 72 | response = self.schema_polish_agent.step(prompt) 73 | return response.msgs[0].content 74 | else: 75 | return self.get_schema() 76 | 77 | @abc.abstractmethod 78 | def get_schema(self) -> str: 79 | """ 80 | Abstract method that returns the database schema as a string. 81 | Must be implemented by all dialect subclasses. 82 | """ 83 | pass 84 | 85 | @abc.abstractmethod 86 | def get_table_names(self) -> List[str]: 87 | """ 88 | Abstract method that returns the table names in the database. 89 | Must be implemented by all dialect subclasses. 90 | """ 91 | pass 92 | 93 | def get_sampled_data(self, data_samples_size: int = 5) -> str: 94 | """ 95 | Abstract method that returns sampled data from the database as a string. 96 | Must be implemented by all dialect subclasses. 97 | """ 98 | metadata = self.database_manager.get_metadata() 99 | sample_data = [] 100 | 101 | for table_name in metadata.tables: 102 | # table = metadata.tables[table_name] 103 | # column_names = [column.name for column in table.columns] 104 | 105 | sample_query = f"SELECT * FROM {table_name} LIMIT {data_samples_size}" 106 | try: 107 | rows = self.database_manager.select(sample_query) 108 | dataset = tabulate(tabular_data=rows, headers='keys', tablefmt='psql') 109 | sample_data.append(f"## {table_name}\n\n{dataset}") 110 | # for row in rows: 111 | # columns = [] 112 | # values = [] 113 | # 114 | # for col_name in column_names: 115 | # if col_name in row and row[col_name] is not None: 116 | # columns.append(col_name) 117 | # if isinstance(row[col_name], str): 118 | # values.append("'" + row[col_name].replace("'", "''") + "'") 119 | # elif isinstance(row[col_name], (int, float)): 120 | # values.append(str(row[col_name])) 121 | # else: 122 | # values.append(f"'{row[col_name]!s}'") 123 | # 124 | # if columns and values: 125 | # columns_stmt = ', '.join(columns) 126 | # values_stmt = ', '.join(values) 127 | # insert_stmt = ( 128 | # f"INSERT INTO {table_name} ({columns_stmt}) VALUES ({values_stmt});" 129 | # ) 130 | # sample_data_sql.append(insert_stmt) 131 | 132 | except Exception as e: 133 | logger.warning(f"Error sampling data from table {table_name}: {e}") 134 | 135 | return "\n\n".join(sample_data) 136 | -------------------------------------------------------------------------------- /camel_database_agent/database/dialect/dialect_mysql.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import ClassVar, List, Optional, Union 16 | 17 | from camel.models import BaseModelBackend 18 | 19 | from camel_database_agent.database.dialect.dialect import ( 20 | DatabaseSchemaDialect, 21 | ) 22 | from camel_database_agent.database.manager import ( 23 | DatabaseManager, 24 | ) 25 | 26 | 27 | class DatabaseSchemaDialectMySQL(DatabaseSchemaDialect): 28 | dialect_name = "mysql" 29 | table_names: ClassVar[List[str]] = [] 30 | 31 | def __init__( 32 | self, 33 | database_manager: DatabaseManager, 34 | model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None, 35 | ): 36 | super().__init__(database_manager=database_manager, model=model) 37 | ddl_statements = [] 38 | for table in self.database_manager.get_metadata().sorted_tables: 39 | self.table_names.append(table.name) 40 | result = database_manager.select(f"SHOW CREATE TABLE {table.name}") 41 | if result: 42 | create_table = result[0]["Create Table"] 43 | ddl_statements.append(create_table + ";") 44 | self.schema = "\n".join(ddl_statements) 45 | 46 | def get_schema(self) -> str: 47 | return self.schema 48 | 49 | def get_table_names(self) -> List[str]: 50 | return self.table_names 51 | 52 | 53 | DatabaseSchemaDialect.register(DatabaseSchemaDialectMySQL) 54 | -------------------------------------------------------------------------------- /camel_database_agent/database/dialect/dialect_postgresql.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import ClassVar, List, Optional, Union 16 | 17 | from camel.models import BaseModelBackend 18 | 19 | from camel_database_agent.database.dialect.dialect import ( 20 | DatabaseSchemaDialect, 21 | ) 22 | from camel_database_agent.database.manager import DatabaseManager 23 | 24 | 25 | class DatabaseSchemaDialectPostgresql(DatabaseSchemaDialect): 26 | dialect_name = "postgresql" 27 | table_names: ClassVar[List[str]] = [] 28 | 29 | def __init__( 30 | self, 31 | database_manager: DatabaseManager, 32 | model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None, 33 | ): 34 | super().__init__(database_manager=database_manager, model=model) 35 | ddl_statements = [] 36 | for table in self.database_manager.get_metadata().sorted_tables: 37 | self.table_names.append(table.name) 38 | create_stmt = [f"CREATE TABLE {table.name} ("] 39 | columns = [] 40 | for column in table.columns: 41 | col_def = f" {column.name} {column.type}" 42 | if not column.nullable: 43 | col_def += " NOT NULL" 44 | if column.primary_key: 45 | col_def += " PRIMARY KEY" 46 | if column.server_default: 47 | if hasattr(column.server_default, "arg"): 48 | col_def += f" DEFAULT {column.server_default.arg}" 49 | else: 50 | col_def += f" DEFAULT {column.server_default}" 51 | columns.append(col_def) 52 | create_stmt.append(",\n".join(columns)) 53 | create_stmt.append(");") 54 | 55 | # 获取表注释 56 | result = self.database_manager.select( 57 | f"SELECT obj_description('{table.name}'::regclass, 'pg_class')" 58 | ) 59 | table_comment = result[0]['obj_description'] 60 | if table_comment: 61 | create_stmt.append(f"COMMENT ON TABLE {table.name} IS '{table_comment}';") 62 | 63 | # 获取列注释 64 | for column in table.columns: 65 | result = self.database_manager.select( 66 | f"SELECT col_description('{table.name}'::regclass, " 67 | f"(SELECT ordinal_position FROM information_schema.columns " 68 | f"WHERE table_name = '{table.name}' AND column_name = '{column.name}'))" 69 | ) 70 | col_comment = result[0]['col_description'] 71 | if col_comment: 72 | create_stmt.append( 73 | f"COMMENT ON COLUMN {table.name}.{column.name} IS '{col_comment}';" 74 | ) 75 | 76 | ddl_statements.append("\n".join(create_stmt)) 77 | self.schema = "\n".join(ddl_statements) 78 | 79 | def get_schema(self) -> str: 80 | return self.schema 81 | 82 | def get_table_names(self) -> List[str]: 83 | return self.table_names 84 | 85 | 86 | DatabaseSchemaDialect.register(DatabaseSchemaDialectPostgresql) 87 | -------------------------------------------------------------------------------- /camel_database_agent/database/dialect/dialect_sqlite.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from typing import ClassVar, List, Optional, Union 16 | 17 | from camel.models import BaseModelBackend 18 | from sqlalchemy import DefaultClause 19 | 20 | from camel_database_agent.database.dialect.dialect import ( 21 | DatabaseSchemaDialect, 22 | ) 23 | from camel_database_agent.database.manager import DatabaseManager 24 | 25 | 26 | class DatabaseSchemaDialectSqlite(DatabaseSchemaDialect): 27 | """ 28 | SQLite doesn't support comments in standard DDL, so we use the best approximation 29 | """ 30 | 31 | dialect_name = "sqlite" 32 | table_names: ClassVar[List[str]] = [] 33 | 34 | def __init__( 35 | self, 36 | database_manager: DatabaseManager, 37 | model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None, 38 | ): 39 | super().__init__(database_manager=database_manager, model=model) 40 | ddl_statements = [] 41 | for table in self.database_manager.get_metadata().sorted_tables: 42 | self.table_names.append(table.name) 43 | create_stmt = f"CREATE TABLE {table.name} (\n" 44 | columns = [] 45 | for column in table.columns: 46 | col_def = f" {column.name} {column.type}" 47 | if not column.nullable: 48 | col_def += " NOT NULL" 49 | if column.primary_key: 50 | col_def += " PRIMARY KEY" 51 | if isinstance(column.server_default, DefaultClause): 52 | col_def += f" DEFAULT {column.server_default.arg}" 53 | columns.append(col_def) 54 | create_stmt += ",\n".join(columns) 55 | create_stmt += "\n);" 56 | ddl_statements.append(create_stmt) 57 | self.schema = "\n".join(ddl_statements) 58 | 59 | def get_schema(self) -> str: 60 | return self.schema 61 | 62 | def get_table_names(self) -> List[str]: 63 | return self.table_names 64 | 65 | 66 | DatabaseSchemaDialect.register(DatabaseSchemaDialectSqlite) 67 | -------------------------------------------------------------------------------- /camel_database_agent/database/manager.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import functools 16 | import logging 17 | from contextlib import contextmanager 18 | from typing import Any, Callable, Iterator, List, TypeVar, Union 19 | 20 | import pandas as pd 21 | from sqlalchemy import MetaData, Result, TextClause, create_engine, text 22 | from sqlalchemy.exc import OperationalError 23 | from sqlalchemy.orm import Session, sessionmaker 24 | 25 | from camel_database_agent.database_base import SQLExecutionError, timing 26 | 27 | T = TypeVar("T") 28 | 29 | logger = logging.getLogger(__name__) 30 | 31 | read_only_message = ( 32 | "Operation rejected: This SQL contains statements that " 33 | "could modify data or schema (DROP, DELETE, UPDATE, etc.)" 34 | " which is not allowed in read-only mode." 35 | ) 36 | 37 | 38 | @contextmanager 39 | def session_scope(session_maker: sessionmaker) -> Iterator[Session]: 40 | """Context manager for database session handling.""" 41 | session = session_maker() 42 | try: 43 | yield session 44 | session.commit() 45 | except Exception: 46 | session.rollback() 47 | raise 48 | finally: 49 | session.close() 50 | 51 | 52 | def with_session(func: Callable) -> Callable: 53 | """Decorator that handles session creation and cleanup.""" 54 | 55 | @functools.wraps(func) 56 | def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any: 57 | with session_scope(self.Session) as session: 58 | return func(self, session, *args, **kwargs) 59 | 60 | return wrapper 61 | 62 | 63 | class DatabaseManager: 64 | def __init__(self, db_url: str, read_only_model: bool = True): 65 | self.db_url = db_url 66 | self.read_only_model = read_only_model 67 | self.engine = create_engine(db_url) 68 | self.Session = sessionmaker(bind=self.engine) 69 | self.metadata = MetaData() 70 | with self.engine.connect(): 71 | logger.info(f"Successfully connected to database: {db_url}") 72 | 73 | @timing 74 | @with_session 75 | def select( 76 | self, session: Session, sql: str, bind_pd: bool = False 77 | ) -> Union[List[dict], pd.DataFrame]: 78 | """Execute Query SQL""" 79 | self._check_sql(sql) 80 | try: 81 | result: Result = session.execute(text(sql)) 82 | if bind_pd: 83 | return pd.DataFrame(result.fetchall(), columns=list(result.keys())) 84 | else: 85 | # 转换结果为列表字典格式 86 | column_names = result.keys() 87 | rows = [dict(zip(column_names, row)) for row in result] 88 | return rows 89 | except OperationalError as e: 90 | raise SQLExecutionError(sql, str(e)) 91 | 92 | @with_session 93 | def execute( 94 | self, session: Session, sql: Union[str, List[str]], ignore_sql_check: bool = False 95 | ) -> bool: 96 | """Execute one or more UPDATE/INSERT/DELETE statements.""" 97 | if not ignore_sql_check: 98 | self._check_sql(sql) 99 | if isinstance(sql, str): 100 | for statement in sql.split(";"): 101 | if statement.strip(): 102 | session.execute(text(statement)) 103 | else: 104 | for statement in sql: 105 | if statement.strip(): 106 | session.execute(text(statement.strip())) 107 | return True 108 | 109 | def dialect_name(self) -> str: 110 | return self.engine.dialect.name 111 | 112 | def get_metadata(self) -> MetaData: 113 | self.metadata.reflect(bind=self.engine) 114 | return self.metadata 115 | 116 | def _check_sql(self, sql: Union[str, List[str]]) -> None: 117 | """Check if SQL is safe to execute (non-destructive).""" 118 | if self.read_only_model: 119 | dangerous_keywords = { 120 | # Standalone keywords that modify data/schema 121 | "DROP": True, 122 | "TRUNCATE": True, 123 | "DELETE": True, 124 | "UPDATE": True, 125 | "INSERT": True, 126 | "ALTER": True, 127 | "RENAME": True, 128 | "REPLACE": True, 129 | # CREATE is special case - some forms are read-only 130 | "CREATE": {"SAFE_PREFIXES": ["SHOW CREATE"]}, 131 | } 132 | 133 | statements = [] 134 | if isinstance(sql, str): 135 | statements = [stmt.strip().upper() for stmt in sql.split(";") if stmt.strip()] 136 | elif isinstance(sql, TextClause): 137 | statements = [stmt.strip().upper() for stmt in sql.text if stmt.strip()] 138 | else: 139 | statements = [stmt.strip().upper() for stmt in sql if stmt.strip()] 140 | 141 | # Check each statement for dangerous keywords 142 | for stmt in statements: 143 | stmt_upper = stmt.upper() 144 | for keyword, config in dangerous_keywords.items(): 145 | if isinstance(config, bool) and config: 146 | if keyword in stmt_upper.split(): 147 | raise SQLExecutionError('\n'.join(statements), read_only_message) 148 | elif isinstance(config, dict): 149 | # Handle special cases with exceptions 150 | if keyword in stmt_upper.split(): 151 | is_safe = False 152 | for safe_prefix in config.get("SAFE_PREFIXES", []): 153 | if stmt_upper.startswith(safe_prefix): 154 | is_safe = True 155 | break 156 | if not is_safe: 157 | raise SQLExecutionError('\n'.join(statements), read_only_message) 158 | -------------------------------------------------------------------------------- /camel_database_agent/database/prompts.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import textwrap 16 | 17 | 18 | class PromptTemplates: 19 | POLISH_SCHEMA_OUTPUT_EXAMPLE = textwrap.dedent(""" 20 | Please add detailed {{language}} comments to the following DDL script, explaining the business meaning and design intent of each table and field. 21 | 22 | Requirements: 23 | - Keep the original DDL script completely unchanged 24 | - Add comments before the script 25 | - Comments should be professional and concise 26 | - Use SQL -- comment syntax 27 | 28 | DDL Script: 29 | ```sql 30 | {{ddl_sql}} 31 | ``` 32 | 33 | Output Example: 34 | ```json 35 | -- User Management Table stores basic information and authentication credentials for system users. Applicable scenarios include user registration, login, and permission management. 36 | CREATE TABLE users ( 37 | id INT AUTO_INCREMENT PRIMARY KEY, -- Unique user identifier, auto-increment ID 38 | username VARCHAR(50) NOT NULL UNIQUE, -- User login account, 50 character length, ensures uniqueness 39 | email VARCHAR(100) NOT NULL UNIQUE, -- User email, used for notifications and password recovery, 100 character length 40 | password VARCHAR(255) NOT NULL, -- User password stored with encryption, recommended to use hash algorithm 41 | full_name VARCHAR(100), -- User full name, optional field 42 | created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, -- User account creation timestamp, defaults to current time 43 | last_login TIMESTAMP NULL, -- Most recent login time, can be initially null 44 | is_active BOOLEAN DEFAULT TRUE -- Account status flag, default is active 45 | ); 46 | ``` 47 | 48 | Key Strategies: 49 | - Clearly instruct not to modify the original DDL 50 | - Provide specific guidance for adding comments 51 | - Specify the expected format and content of comments 52 | - Emphasize professionalism and conciseness 53 | """) 54 | 55 | PARSE_SAMPLED_RECORD = textwrap.dedent(""" 56 | # JSON Format Request 57 | You are a specialized JSON generator. Your only function is to parse the provided data and convert it to JSON format, strictly following the format requirements. 58 | 59 | ## Input Data: 60 | {{section}} 61 | 62 | ## Instructions: 63 | 1. Create a JSON array with each table as an object 64 | 2. Each object must have exactly three fields: 65 | - "id": the table name 66 | - "summary": a brief description of the table 67 | - "dataset": the data in markdown format 68 | 3. The entire response must be ONLY valid JSON without any additional text, explanation, or markdown code blocks 69 | 70 | ## Required Output Format: 71 | { 72 | "items":[{ 73 | "id": "", 74 | "summary": "
", 75 | "dataset": "" 76 | }] 77 | } 78 | 79 | ## IMPORTANT: 80 | - Your response must contain ONLY the JSON object, nothing else 81 | - Do not include explanations, introductions, or conclusions 82 | - Do not use markdown code blocks (```) around the JSON 83 | - Do not include phrases like "Here's the JSON" or "I've created the JSON" 84 | - Do not indicate that you are providing the output in any way""") 85 | -------------------------------------------------------------------------------- /camel_database_agent/database/schema.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import re 17 | import textwrap 18 | from typing import Generic, List, Optional, TypeVar, Union 19 | 20 | from camel.agents import ChatAgent 21 | from camel.models import BaseModelBackend 22 | from pydantic import BaseModel 23 | 24 | from camel_database_agent.database.manager import DatabaseManager 25 | from camel_database_agent.database.prompts import PromptTemplates 26 | from camel_database_agent.database_base import timing 27 | 28 | logger = logging.getLogger(__name__) 29 | 30 | 31 | class DDLRecord(BaseModel): 32 | id: str 33 | summary: str 34 | sql: str 35 | 36 | 37 | class DMLRecord(BaseModel): 38 | id: str 39 | summary: str 40 | dataset: str 41 | 42 | 43 | class QueryRecord(BaseModel): 44 | id: str 45 | question: str 46 | sql: str 47 | 48 | 49 | T = TypeVar('T', bound=BaseModel) 50 | 51 | 52 | class SchemaParseResponse(BaseModel, Generic[T]): 53 | data: List[T] 54 | usage: Optional[dict] 55 | errors: Optional[List[T]] = None 56 | 57 | 58 | class DDLRecordResponseFormat(BaseModel): 59 | items: List[DDLRecord] 60 | 61 | 62 | class DMLRecordResponseFormat(BaseModel): 63 | items: List[DMLRecord] 64 | 65 | 66 | class QueryRecordResponseFormat(BaseModel): 67 | items: List[QueryRecord] 68 | 69 | 70 | class DatabaseSchemaParse: 71 | def __init__( 72 | self, 73 | database_manager: DatabaseManager, 74 | model: Union[BaseModelBackend, List[BaseModelBackend]], 75 | ): 76 | self.database_manager = database_manager 77 | self.parsing_agent = ChatAgent( 78 | system_message="You are a database expert, proficient in the " 79 | "SQL syntax of various databases.", 80 | model=model, 81 | ) 82 | 83 | @timing 84 | def parse_ddl_record(self, text: str) -> SchemaParseResponse: 85 | """Parsing DDL Schema""" 86 | prompt = ( 87 | "Translate the following information into a JSON array format, " 88 | "with each JSON object in the array containing three " 89 | "elements: " 90 | "\"id\" for the table name, " 91 | "\"summary\" for a summary of the table, and " 92 | "\"sql\" for the SQL statement of the table creation.\n\n" 93 | ) 94 | if text.startswith("```sql"): 95 | prompt += f"{text}\n\n" 96 | else: 97 | prompt += f"```sql\n{text}```\n\n" 98 | 99 | # 非 openai 模型要增加以下片段 100 | prompt += textwrap.dedent( 101 | "Output Format:\n" 102 | "{" 103 | " \"items\":" 104 | " [" 105 | " {" 106 | " \"id\": \"
\"," 107 | " \"summary\": \"
\"," 108 | " \"sql\": \"
\"" 109 | " }" 110 | " ]" 111 | "}\n\n" 112 | ) 113 | prompt += "Now, directly output the JSON array without explanation." 114 | response = self.parsing_agent.step(prompt, response_format=DDLRecordResponseFormat) 115 | ddl_record_response = DDLRecordResponseFormat.model_validate_json(response.msgs[0].content) 116 | return SchemaParseResponse(data=ddl_record_response.items, usage=response.info["usage"]) 117 | 118 | @timing 119 | def parse_sampled_record(self, text: str) -> SchemaParseResponse: 120 | """Parsing Sampled Data""" 121 | data: List[DMLRecord] = [] 122 | usage: Optional[dict] = None 123 | sections = self.split_markdown_by_h2(text) 124 | for section in sections: 125 | prompt = PromptTemplates.PARSE_SAMPLED_RECORD.replace("{{section}}", section) 126 | try: 127 | self.parsing_agent.reset() 128 | response = self.parsing_agent.step(prompt, response_format=DMLRecordResponseFormat) 129 | dml_record_response = DMLRecordResponseFormat.model_validate_json( 130 | response.msgs[0].content 131 | ) 132 | data.extend(dml_record_response.items) 133 | if usage is None: 134 | usage = response.info["usage"] 135 | else: 136 | usage["completion_tokens"] += response.info["usage"]["completion_tokens"] 137 | usage["prompt_tokens"] += response.info["usage"]["prompt_tokens"] 138 | usage["total_tokens"] += response.info["usage"]["total_tokens"] 139 | except Exception as e: 140 | logger.error(f"Unable to process messages: {e}") 141 | logger.error(f"Prompt: {prompt}") 142 | return SchemaParseResponse(data=data, usage=usage) 143 | 144 | @timing 145 | def parse_query_record(self, text: str) -> SchemaParseResponse: 146 | """Parsing Query SQL statements""" 147 | prompt = ( 148 | "The following is an analysis of user query requirements, " 149 | "from which you need to extract user questions and " 150 | "corresponding SQL statements.\n\n" 151 | ) 152 | prompt += f"```sql\n{text}```\n" 153 | prompt += "Please output the summary information and SQL script in JSON format." 154 | response = self.parsing_agent.step(prompt, response_format=QueryRecordResponseFormat) 155 | query_record_response = QueryRecordResponseFormat.model_validate_json( 156 | response.msgs[0].content 157 | ) 158 | return SchemaParseResponse(data=query_record_response.items, usage=response.info["usage"]) 159 | 160 | def split_markdown_by_h2(self, markdown_text): 161 | sections = re.split(r'(?=^##\s+)', markdown_text, flags=re.MULTILINE) 162 | sections = [section.strip() for section in sections if section.strip()] 163 | return sections 164 | -------------------------------------------------------------------------------- /camel_database_agent/database_agent.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import json 16 | import logging 17 | import os 18 | import random 19 | from typing import Any, List, Optional, Union, cast 20 | 21 | from camel.agents import BaseAgent, ChatAgent 22 | from camel.embeddings import BaseEmbedding, OpenAIEmbedding 23 | from camel.models import BaseModelBackend, ModelFactory 24 | from camel.types import ModelPlatformType, ModelType 25 | from colorama import Fore 26 | from pydantic import BaseModel 27 | from tabulate import tabulate 28 | 29 | from camel_database_agent.database.dialect.dialect import ( 30 | DatabaseSchemaDialect, 31 | ) 32 | from camel_database_agent.database.manager import DatabaseManager 33 | from camel_database_agent.database.schema import ( 34 | DatabaseSchemaParse, 35 | DDLRecord, 36 | DMLRecord, 37 | QueryRecord, 38 | SchemaParseResponse, 39 | ) 40 | from camel_database_agent.database_base import ( 41 | AssistantMessage, 42 | HumanMessage, 43 | MessageLog, 44 | MessageLogToEmpty, 45 | SQLExecutionError, 46 | TokenUsage, 47 | TrainLevel, 48 | messages_log, 49 | strip_sql_code_block, 50 | timing, 51 | ) 52 | from camel_database_agent.database_prompt import PromptTemplates 53 | from camel_database_agent.datagen.pipeline import ( 54 | DataQueryInferencePipeline, 55 | ) 56 | from camel_database_agent.knowledge.knowledge import DatabaseKnowledge, RecordType 57 | from camel_database_agent.knowledge.knowledge_qdrant import ( 58 | DatabaseKnowledgeQdrant, 59 | ) 60 | 61 | logger = logging.getLogger(__name__) 62 | 63 | 64 | class QuestionMeta(BaseModel): 65 | question: str 66 | sql: str 67 | prompt: str 68 | usage: dict 69 | 70 | 71 | class DatabaseAgentResponse(BaseModel): 72 | ask: str 73 | dataset: Optional[Any] = None 74 | sql: Optional[str] = None 75 | success: bool = True 76 | error: Optional[str] = None 77 | usage: Optional[dict] = None 78 | 79 | 80 | class DatabaseAgent(BaseAgent): 81 | database_summary: str = "" 82 | recommendation_question: str = "" 83 | 84 | def step(self, *args: Any, **kwargs: Any) -> Any: 85 | pass 86 | 87 | def reset(self, *args: Any, **kwargs: Any) -> Any: 88 | pass 89 | 90 | def __init__( 91 | self, 92 | db_url: Optional[str] = None, 93 | ddl_sql: Optional[str] = None, 94 | data_sql: Optional[str] = None, 95 | polished_schema: bool = True, 96 | database_manager: Optional[DatabaseManager] = None, 97 | model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None, 98 | embedding_model: Optional[BaseEmbedding] = None, 99 | database_knowledge: Optional[DatabaseKnowledge] = None, 100 | data_path: Optional[str] = None, 101 | language: str = "English", 102 | interactive_mode: bool = False, 103 | read_only_model: bool = True, 104 | ): 105 | self.interactive_mode = interactive_mode 106 | if database_manager: 107 | self.database_manager = database_manager 108 | else: 109 | if db_url: 110 | self.database_manager = DatabaseManager( 111 | db_url=db_url, read_only_model=read_only_model 112 | ) 113 | else: 114 | raise ValueError("db_url or database_manager must be provided") 115 | self.ddl_sql = ddl_sql 116 | self.data_sql = data_sql 117 | self.language = language 118 | if not data_path: 119 | user_home = os.path.expanduser("~") 120 | data_path = os.path.join(user_home, "camel_database_agent_data") 121 | if not os.path.exists(data_path): 122 | os.makedirs(data_path) 123 | logger.info(f"Workspace: {data_path}") 124 | self.data_path = data_path 125 | self.polished_schema = polished_schema 126 | self.model_backend = ( 127 | model 128 | if model 129 | else ModelFactory.create( 130 | model_platform=ModelPlatformType.DEFAULT, 131 | model_type=ModelType.DEFAULT, 132 | ) 133 | ) 134 | self.embedding_model_backend = embedding_model if embedding_model else OpenAIEmbedding() 135 | self.knowledge_path = os.path.join(str(self.data_path), "knowledge") 136 | self.database_knowledge_backend = ( 137 | database_knowledge 138 | if database_knowledge 139 | else DatabaseKnowledgeQdrant( 140 | embedding=self.embedding_model_backend, 141 | model=self.model_backend, 142 | path=self.knowledge_path, 143 | ) 144 | ) 145 | self.dialect: DatabaseSchemaDialect = DatabaseSchemaDialect.get_dialect( 146 | dialect_name=self.database_manager.dialect_name(), 147 | database_manager=self.database_manager, 148 | model=self.model_backend, 149 | ) 150 | self.schema_parse = DatabaseSchemaParse( 151 | database_manager=self.database_manager, model=self.model_backend 152 | ) 153 | self.agent = ChatAgent( 154 | system_message="You are a business expert, skilled at in-depth " 155 | "analysis of user data query requirements through " 156 | "reverse engineering of database table structures.", 157 | model=self.model_backend, 158 | message_window_size=100, 159 | ) 160 | if os.path.exists(os.path.join(self.knowledge_path, "database_summary.txt")): 161 | with open( 162 | os.path.join(self.knowledge_path, "database_summary.txt"), 163 | "r", 164 | encoding="utf-8", 165 | ) as f: 166 | self.database_summary = f.read() 167 | if os.path.exists(os.path.join(self.knowledge_path, "recommendation_question.txt")): 168 | with open( 169 | os.path.join(self.knowledge_path, "recommendation_question.txt"), 170 | "r", 171 | encoding="utf-8", 172 | ) as f: 173 | self.recommendation_question = f.read() 174 | if self.ddl_sql is None and os.path.exists( 175 | os.path.join(self.knowledge_path, "ddl_sql.sql") 176 | ): 177 | with open( 178 | os.path.join(self.knowledge_path, "ddl_sql.sql"), 179 | "r", 180 | encoding="utf-8", 181 | ) as f: 182 | self.ddl_sql = f.read() 183 | if self.data_sql is None and os.path.exists( 184 | os.path.join(self.knowledge_path, "data_sql.sql") 185 | ): 186 | with open( 187 | os.path.join(self.knowledge_path, "data_sql.sql"), 188 | "r", 189 | encoding="utf-8", 190 | ) as f: 191 | self.data_sql = f.read() 192 | 193 | @timing 194 | def _parse_schema_to_knowledge(self, polish: bool = False) -> TokenUsage: 195 | """Generate schema data to knowledge""" 196 | self.ddl_sql = ( 197 | self.dialect.get_polished_schema(self.language) 198 | if polish 199 | else self.dialect.get_schema() 200 | ) 201 | # Save the schema to a file 202 | with open( 203 | os.path.join(self.knowledge_path, "ddl_origin.sql"), 204 | "w", 205 | encoding="utf-8", 206 | ) as f: 207 | f.write(self.dialect.get_schema()) 208 | 209 | # Save the polished schema to a file 210 | with open( 211 | os.path.join(self.knowledge_path, "ddl_sql.sql"), 212 | "w", 213 | encoding="utf-8", 214 | ) as f: 215 | f.write(self.ddl_sql) 216 | 217 | schema_parse_response: SchemaParseResponse = self.schema_parse.parse_ddl_record( 218 | self.ddl_sql 219 | ) 220 | with open( 221 | os.path.join(self.knowledge_path, "ddl_records.json"), 222 | "w", 223 | encoding="utf-8", 224 | ) as f: 225 | f.write( 226 | json.dumps( 227 | [record.model_dump() for record in schema_parse_response.data], 228 | ensure_ascii=False, 229 | indent=4, 230 | ) 231 | ) 232 | 233 | self.database_knowledge_backend.add(schema_parse_response.data) 234 | if schema_parse_response.usage is None: 235 | return TokenUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0) 236 | return TokenUsage( 237 | completion_tokens=schema_parse_response.usage["completion_tokens"], 238 | prompt_tokens=schema_parse_response.usage["prompt_tokens"], 239 | total_tokens=schema_parse_response.usage["total_tokens"], 240 | ) 241 | 242 | @timing 243 | def _parse_sampled_data_to_knowledge(self, data_samples_size: int = 5) -> TokenUsage: 244 | """Generate sampled data to knowledge""" 245 | self.data_sql = self.dialect.get_sampled_data(data_samples_size=data_samples_size) 246 | with open( 247 | os.path.join(self.knowledge_path, "data_sql.sql"), 248 | "w", 249 | encoding="utf-8", 250 | ) as f: 251 | f.write(self.data_sql) 252 | 253 | schema_parse_response: SchemaParseResponse = self.schema_parse.parse_sampled_record( 254 | self.data_sql 255 | ) 256 | 257 | with open( 258 | os.path.join(self.knowledge_path, "data_records.json"), 259 | "w", 260 | encoding="utf-8", 261 | ) as f: 262 | f.write( 263 | json.dumps( 264 | [record.model_dump() for record in schema_parse_response.data], 265 | ensure_ascii=False, 266 | indent=4, 267 | ) 268 | ) 269 | 270 | self.database_knowledge_backend.add(schema_parse_response.data) 271 | if schema_parse_response.usage is None: 272 | return TokenUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0) 273 | return TokenUsage( 274 | completion_tokens=schema_parse_response.usage["completion_tokens"], 275 | prompt_tokens=schema_parse_response.usage["prompt_tokens"], 276 | total_tokens=schema_parse_response.usage["total_tokens"], 277 | ) 278 | 279 | @timing 280 | def _parse_query_to_knowledge(self, query_samples_size: int = 20) -> TokenUsage: 281 | """Generate some queries to knowledge""" 282 | if self.ddl_sql and self.data_sql: 283 | pipeline = DataQueryInferencePipeline( 284 | ddl_sql=self.ddl_sql, 285 | data_sql=self.data_sql, 286 | model=self.model_backend, 287 | database_manager=self.database_manager, 288 | language=self.language, 289 | ) 290 | query_records: List[QueryRecord] = [] 291 | usage: Optional[dict] = None 292 | while len(query_records) < query_samples_size: 293 | schema_parse_response: SchemaParseResponse = pipeline.generate( 294 | query_samples_size=query_samples_size 295 | ) 296 | usage = schema_parse_response.usage 297 | query_records.extend(cast(List[QueryRecord], schema_parse_response.data)) 298 | with open( 299 | os.path.join(self.knowledge_path, "question_sql.txt"), 300 | "w", 301 | encoding="utf-8", 302 | ) as f: 303 | for query_record in query_records: 304 | f.write(f"QUESTION: {query_record.question}\nSQL: {query_record.sql}\n\n") 305 | 306 | self.database_knowledge_backend.add(query_records) 307 | if usage is None: 308 | return TokenUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0) 309 | return TokenUsage( 310 | completion_tokens=usage["completion_tokens"], 311 | prompt_tokens=usage["prompt_tokens"], 312 | total_tokens=usage["total_tokens"], 313 | ) 314 | else: 315 | raise ValueError("ddl_sql and data_sql must be provided") 316 | 317 | @timing 318 | def _generate_database_summary(self, query_samples_size: int) -> TokenUsage: 319 | self.ddl_sql = ( 320 | self.dialect.get_polished_schema(self.language) 321 | if not self.polished_schema 322 | else self.dialect.get_schema() 323 | ) 324 | query_samples: List[QueryRecord] = ( 325 | self.database_knowledge_backend.get_query_collection_sample(query_samples_size) 326 | ) 327 | 328 | prompt = PromptTemplates.DATABASE_SUMMARY_OUTPUT_EXAMPLE 329 | prompt = prompt.replace("{{ddl_sql}}", self.ddl_sql) 330 | prompt = prompt.replace("{{language}}", self.language) 331 | 332 | response = self.agent.step(prompt) 333 | self.database_summary = response.msgs[0].content 334 | with open( 335 | os.path.join(self.knowledge_path, "database_summary.txt"), 336 | "w", 337 | encoding="utf-8", 338 | ) as f: 339 | f.write(self.database_summary) 340 | self.recommendation_question = "\n".join( 341 | [query_sample.question for query_sample in query_samples] 342 | ) 343 | with open( 344 | os.path.join(self.knowledge_path, "recommendation_question.txt"), 345 | "w", 346 | encoding="utf-8", 347 | ) as f: 348 | f.write(self.recommendation_question) 349 | 350 | return TokenUsage( 351 | completion_tokens=response.info['usage']["completion_tokens"], 352 | prompt_tokens=response.info['usage']["prompt_tokens"], 353 | total_tokens=response.info['usage']["total_tokens"], 354 | ) 355 | 356 | def get_summary(self) -> str: 357 | return self.database_summary 358 | 359 | def get_recommendation_question(self, sampled_num: int = 5) -> str: 360 | """ 361 | Returns a string with randomly sampled questions from the recommendation_question list. 362 | 363 | Args: 364 | sampled_num: Number of questions to sample 365 | 366 | Returns: 367 | A string with sampled questions (one per line) 368 | """ 369 | questions = self.recommendation_question.strip().split('\n') 370 | 371 | # Ensure we don't try to sample more questions than available 372 | sampled_num = min(sampled_num, len(questions)) 373 | 374 | # Randomly sample questions 375 | sampled_questions = random.sample(questions, sampled_num) 376 | 377 | return '\n'.join(sampled_questions) 378 | 379 | def add_knowledge(self, records: List[RecordType]) -> None: 380 | self.database_knowledge_backend.add(records) 381 | 382 | @timing 383 | def train_knowledge( 384 | self, 385 | level: TrainLevel = TrainLevel.MEDIUM, 386 | reset_train: bool = False, 387 | ) -> TokenUsage: 388 | """Train knowledge""" 389 | data_samples_size = 20 390 | table_count = len(self.dialect.get_table_names()) 391 | query_samples_size = table_count 392 | if level == TrainLevel.LOW: 393 | query_samples_size = table_count * 2 394 | elif level == TrainLevel.MEDIUM: 395 | query_samples_size = table_count * 5 396 | elif level == TrainLevel.HIGH: 397 | query_samples_size = table_count * 10 398 | 399 | if reset_train and os.path.exists(self.knowledge_path): 400 | self.database_knowledge_backend.clear() 401 | self.ddl_sql = None 402 | self.data_sql = None 403 | self.database_summary = "" 404 | self.recommendation_question = "" 405 | logger.info("Reset knowledge...") 406 | 407 | if ( 408 | self.database_knowledge_backend.get_table_collection_size() == 0 409 | or self.database_knowledge_backend.get_data_collection_size() == 0 410 | or self.database_knowledge_backend.get_query_collection_size() == 0 411 | ): 412 | message = ( 413 | f"Initial knowledge base training on {table_count} tables. " 414 | f"It will take {Fore.GREEN}about {int(table_count * 28 / 60)} minutes.{Fore.RESET}" 415 | ) 416 | if self.interactive_mode: 417 | logger.info(f"\r{message}") 418 | else: 419 | logger.info(message) 420 | 421 | token_usage: TokenUsage = TokenUsage() 422 | 423 | if self.database_knowledge_backend.get_table_collection_size() == 0: 424 | token_usage.add_token(self._parse_schema_to_knowledge(polish=self.polished_schema)) 425 | 426 | if self.database_knowledge_backend.get_data_collection_size() == 0: 427 | token_usage.add_token( 428 | self._parse_sampled_data_to_knowledge(data_samples_size=data_samples_size) 429 | ) 430 | 431 | if self.database_knowledge_backend.get_query_collection_size() == 0: 432 | token_usage.add_token(self._parse_query_to_knowledge(query_samples_size)) 433 | 434 | if not self.database_summary or reset_train: 435 | token_usage.add_token( 436 | self._generate_database_summary(query_samples_size=query_samples_size) 437 | ) 438 | 439 | return token_usage 440 | 441 | @timing 442 | def question_to_sql(self, question: str, dialect_name: str) -> QuestionMeta: 443 | """Question to SQL""" 444 | prompt = PromptTemplates.QUESTION_CONVERT_SQL.replace("{{dialect_name}}", dialect_name) 445 | 446 | ddl_records: List[DDLRecord] = self.database_knowledge_backend.query_ddl(question) 447 | prompt = prompt.replace( 448 | "{{table_schema}}", "\n".join([record.sql for record in ddl_records]) 449 | ) 450 | 451 | data_records: List[DMLRecord] = self.database_knowledge_backend.query_data(question) 452 | prompt = prompt.replace( 453 | "{{sample_data}}", "\n".join([record.dataset for record in data_records]) 454 | ) 455 | 456 | query_records: List[QueryRecord] = self.database_knowledge_backend.query_query(question) 457 | prompt = prompt.replace( 458 | "{{qa_pairs}}", 459 | "\n".join( 460 | [f"QUESTION: {record.question}\nSQL: {record.sql}\n\n" for record in query_records] 461 | ), 462 | ) 463 | 464 | prompt = prompt.replace("{{question}}", question) 465 | logger.debug(Fore.GREEN + "PROMPT:" + prompt) 466 | self.agent.reset() 467 | response = self.agent.step(prompt) 468 | 469 | return QuestionMeta( 470 | question=question, 471 | sql=strip_sql_code_block(response.msgs[0].content), 472 | prompt=prompt, 473 | usage=response.info['usage'], 474 | ) 475 | 476 | @messages_log 477 | def ask( 478 | self, 479 | session_id: str, 480 | question: str, 481 | message_log: Optional[MessageLog] = None, 482 | bind_pd: Optional[bool] = True, 483 | ) -> DatabaseAgentResponse: 484 | if not message_log: 485 | message_log = MessageLogToEmpty() 486 | message_log.messages_writer(HumanMessage(session_id=session_id, content=question)) 487 | question_meta = self.question_to_sql( 488 | question=question, 489 | dialect_name=self.database_manager.dialect_name(), 490 | ) 491 | try: 492 | message_log.messages_writer( 493 | AssistantMessage(session_id=session_id, content=question_meta.sql) 494 | ) 495 | dataset = self.database_manager.select(sql=question_meta.sql, bind_pd=bind_pd) 496 | message_log.messages_writer( 497 | AssistantMessage( 498 | session_id=session_id, 499 | content=tabulate(dataset, headers="keys", tablefmt="psql"), 500 | ) 501 | ) 502 | return DatabaseAgentResponse( 503 | ask=question, 504 | dataset=dataset, 505 | sql=question_meta.sql, 506 | usage=question_meta.usage, 507 | ) 508 | except SQLExecutionError as e: 509 | message_log.messages_writer(AssistantMessage(session_id=session_id, content=str(e))) 510 | return DatabaseAgentResponse( 511 | ask=question, 512 | dataset=None, 513 | sql=e.sql, 514 | success=False, 515 | error=e.error_message, 516 | usage=question_meta.usage, 517 | ) 518 | except Exception as e: 519 | message_log.messages_writer(AssistantMessage(session_id=session_id, content=str(e))) 520 | return DatabaseAgentResponse( 521 | ask=question, 522 | dataset=None, 523 | sql=question_meta.sql, 524 | success=False, 525 | error=str(e), 526 | ) 527 | -------------------------------------------------------------------------------- /camel_database_agent/database_base.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import os 17 | import sys 18 | import time 19 | from abc import ABC, abstractmethod 20 | from asyncio import Event 21 | from enum import Enum 22 | from functools import wraps 23 | from itertools import cycle 24 | from threading import Thread 25 | from typing import Any, Callable, TypeVar, cast 26 | 27 | from colorama import Fore 28 | from pydantic import BaseModel 29 | 30 | logger = logging.getLogger(__name__) 31 | 32 | 33 | class TrainLevel(Enum): 34 | """Enum class for training levels.""" 35 | 36 | LOW = "low" 37 | MEDIUM = "medium" 38 | HIGH = "high" 39 | 40 | 41 | class TokenUsage(BaseModel): 42 | completion_tokens: int = 0 43 | prompt_tokens: int = 0 44 | total_tokens: int = 0 45 | 46 | def add_token(self, usage: "TokenUsage"): 47 | self.completion_tokens += usage.completion_tokens 48 | self.prompt_tokens += usage.prompt_tokens 49 | self.total_tokens += usage.total_tokens 50 | 51 | 52 | class Message(BaseModel): 53 | session_id: str 54 | role: str 55 | content: str 56 | 57 | 58 | class HumanMessage(Message): 59 | role: str = "user" 60 | 61 | 62 | class AssistantMessage(Message): 63 | role: str = "assistant" 64 | 65 | 66 | class MessageLog(ABC): 67 | @abstractmethod 68 | def messages_writer(self, message: Message) -> None: 69 | raise NotImplementedError 70 | 71 | 72 | class MessageLogToEmpty(MessageLog): 73 | def messages_writer(self, message: Message) -> None: 74 | pass 75 | 76 | 77 | class MessageLogToFile(MessageLog): 78 | def __init__(self, f: Any): 79 | self.f = f 80 | 81 | def messages_writer(self, message: Message) -> None: 82 | self.f.write(message.model_dump_json() + "\n") 83 | 84 | 85 | class SQLExecutionError(Exception): 86 | """Exception raised for SQL execution errors. 87 | 88 | Attributes: 89 | sql -- the SQL statement that caused the error 90 | error_message -- explanation of the error 91 | """ 92 | 93 | def __init__(self, sql: str, error_message: str): 94 | self.sql = sql 95 | self.error_message = error_message 96 | super().__init__(f"SQL execution error: {error_message}\nSQL: {sql}") 97 | 98 | 99 | T = TypeVar("T", bound=Callable[..., Any]) 100 | 101 | 102 | def spinner(stop_event, message=""): 103 | spinner_chars = ['⣾', '⣽', '⣻', '⢿', '⡿', '⣟', '⣯', '⣷'] 104 | for char in cycle(spinner_chars): 105 | if stop_event.is_set(): 106 | break 107 | sys.stdout.write(f"\r{Fore.LIGHTGREEN_EX}{message}{char}{Fore.RESET}") 108 | sys.stdout.flush() 109 | time.sleep(0.1) 110 | # Clear the entire line before exiting 111 | sys.stdout.write('\r' + ' ' * 100 + '\r') 112 | sys.stdout.flush() 113 | 114 | 115 | def timing(func: T) -> T: 116 | @wraps(func) 117 | def timing_wrapper(*args: Any, **kwargs: Any) -> Any: 118 | info = func.__name__ 119 | func_doc = func.__doc__ 120 | if func_doc: 121 | info = func_doc 122 | start_time = time.perf_counter() 123 | 124 | stop_spinner = Event() 125 | spinner_thread = Thread(target=spinner, args=(stop_spinner, "Thinking...")) 126 | spinner_thread.daemon = True 127 | try: 128 | spinner_thread.start() 129 | result = func(*args, **kwargs) 130 | finally: 131 | # sys.stdout.write('\r' + ' ' * 100 + '\r') 132 | stop_spinner.set() 133 | spinner_thread.join() 134 | end_time = time.perf_counter() 135 | total_time = end_time - start_time 136 | logger.info(f"\r{info} Took {Fore.GREEN}{total_time:.4f} seconds{Fore.RESET}") 137 | return result 138 | 139 | return cast(T, timing_wrapper) 140 | 141 | 142 | def messages_log(func: T) -> T: 143 | @wraps(func) 144 | def wrapper(*args: Any, **kwargs: Any) -> Any: 145 | session_path = os.path.join( 146 | str(args[0].data_path), str(kwargs.get("session_id", "default")) 147 | ) 148 | if not os.path.exists(session_path): 149 | os.makedirs(session_path, exist_ok=True) 150 | 151 | with open(os.path.join(session_path, "messages.jsonl"), "a", encoding="utf-8") as f: 152 | kwargs["message_log"] = MessageLogToFile(f) 153 | return func(*args, **kwargs) 154 | 155 | return cast(T, wrapper) 156 | 157 | 158 | def strip_sql_code_block(sql: str) -> str: 159 | """Remove Markdown SQL code block delimiters from the given string.""" 160 | sql = sql.strip() 161 | if sql.startswith("```sql"): 162 | sql = sql[6:] 163 | if sql.endswith("```"): 164 | sql = sql[:-3] 165 | return sql.strip() # Add extra strip to remove any whitespace after delimiters 166 | -------------------------------------------------------------------------------- /camel_database_agent/database_prompt.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import textwrap 16 | 17 | 18 | class PromptTemplates: 19 | DATABASE_SUMMARY_OUTPUT_EXAMPLE = textwrap.dedent(""" 20 | You are a business database expert. Please generate a {{language}} database summary based on the following table structure, with the aim of helping people understand what information this database can provide from a business perspective. 21 | 22 | ## Table Schema 23 | {{ddl_sql}} 24 | 25 | ## Output Example 26 | 27 | This database is the core data model of a typical e-commerce system, 28 | including modules for user management, product management, order transactions, 29 | payment processes, and address management. 30 | 31 | It achieves a complete business loop through multi-table associations 32 | (such as user-order-product-payment), supporting users throughout 33 | the entire process from registration, browsing products, 34 | placing orders and making payments to receiving goods. 35 | 36 | Each table ensures data consistency through foreign key constraints 37 | (such as the strong association between orders and users or addresses) 38 | and includes timestamp fields (`created_at`/`updated_at`) for tracking data changes. 39 | 40 | Now, You only need to output a descriptive text in {{language}}. 41 | """) 42 | 43 | QUESTION_CONVERT_SQL = textwrap.dedent(""" 44 | The following is the table structure in the database and some common query SQL statements. Please convert the user's question into an SQL query statement. Note to comply with sqlite syntax. Do not explain, just provide the SQL directly. 45 | 46 | Database System: {{dialect_name}} 47 | 48 | ## Table Schema 49 | ```sql 50 | {{table_schema}} 51 | ``` 52 | 53 | ## Data Example 54 | ```sql 55 | {{sample_data}} 56 | ``` 57 | ## Few-Shot Example 58 | {{qa_pairs}} 59 | 60 | ## User Question 61 | {{question}} 62 | 63 | ## Instructions 64 | 1. Follow {{dialect_name}} syntax 65 | 2. Do not provide explanations, just give the SQL statement directly 66 | """) 67 | -------------------------------------------------------------------------------- /camel_database_agent/datagen/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /camel_database_agent/datagen/pipeline.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | from typing import List, Optional, Union 17 | 18 | from camel.agents import ChatAgent 19 | from camel.models import BaseModelBackend, ModelFactory 20 | from camel.types import ModelPlatformType, ModelType 21 | from colorama import Fore 22 | 23 | from camel_database_agent.core.exceptions import QueryParsingError 24 | from camel_database_agent.database.manager import DatabaseManager 25 | from camel_database_agent.database.schema import ( 26 | QueryRecord, 27 | QueryRecordResponseFormat, 28 | SchemaParseResponse, 29 | ) 30 | from camel_database_agent.database_base import SQLExecutionError, timing 31 | from camel_database_agent.datagen.prompts import PromptTemplates 32 | 33 | logger = logging.getLogger(__name__) 34 | 35 | 36 | class DataQueryInferencePipeline: 37 | def __init__( 38 | self, 39 | ddl_sql: str, 40 | data_sql: str, 41 | database_manager: DatabaseManager, 42 | model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None, 43 | language: str = "English", 44 | prompt_templates: Optional[PromptTemplates] = None, 45 | ): 46 | self.model_backend = ( 47 | model 48 | if model 49 | else ModelFactory.create( 50 | model_platform=ModelPlatformType.DEFAULT, 51 | model_type=ModelType.DEFAULT, 52 | ) 53 | ) 54 | self.ddl_sql = ddl_sql 55 | self.data_sql = data_sql 56 | self.database_manager = database_manager 57 | self.prompt_templates = prompt_templates or PromptTemplates() 58 | self.question_agent = ChatAgent( 59 | system_message="You are a business expert, skilled at deeply " 60 | "analyzing user data query requirements based on " 61 | "database table structures.", 62 | model=model, 63 | output_language=language, 64 | ) 65 | 66 | def _prepare_prompt(self, query_samples_needed: int) -> str: 67 | """Prepare the prompt words for generating queries.""" 68 | prompt = self.prompt_templates.QUESTION_INFERENCE_PIPELINE 69 | prompt = prompt.replace("{{ddl_sql}}", self.ddl_sql) 70 | prompt = prompt.replace("{{data_sql}}", self.data_sql) 71 | prompt = prompt.replace("{{query_samples_size}}", str(query_samples_needed)) 72 | prompt = prompt.replace("{{dialect_name}}", self.database_manager.dialect_name()) 73 | return prompt 74 | 75 | def _parse_response_content(self, content: str) -> List[QueryRecord]: 76 | """Parse the response content into a list of QueryRecords.""" 77 | if content.startswith("```json") or content.startswith("```"): 78 | content = content.split("\n", 1)[1] # Remove ```json 79 | if content.endswith("```"): 80 | content = content.rsplit("\n", 1)[0] # Remove ``` 81 | 82 | try: 83 | structured_response = QueryRecordResponseFormat.model_validate_json(content) 84 | return structured_response.items 85 | except Exception as e: 86 | raise QueryParsingError(f"Failed to parse response: {e!s}") 87 | 88 | def _validate_query(self, query_record: QueryRecord) -> bool: 89 | """Verify whether the query is executable.""" 90 | try: 91 | self.database_manager.select(query_record.sql) 92 | return True 93 | except SQLExecutionError as e: 94 | logger.debug(f"{Fore.RED}SQLExecutionError{Fore.RESET}: {e.sql} {e.error_message}") 95 | return False 96 | except Exception as e: 97 | logger.error( 98 | f"An error occurred while executing the query: " 99 | f"{query_record.question} {query_record.sql} {e!s}" 100 | ) 101 | return False 102 | 103 | @timing 104 | def generate(self, query_samples_size: int = 20) -> SchemaParseResponse: 105 | """Data generation for samples""" 106 | 107 | dataset: List[QueryRecord] = [] 108 | usage: Optional[dict] = None 109 | error_query_records: List[QueryRecord] = [] 110 | 111 | while len(dataset) < query_samples_size: 112 | try: 113 | # Calculate the number of samples to be generated this time. 114 | samples_needed = query_samples_size - len(dataset) 115 | prompt = self._prepare_prompt(samples_needed) 116 | 117 | response = self.question_agent.step( 118 | prompt, response_format=QueryRecordResponseFormat 119 | ) 120 | if response.info and 'usage' in response.info: 121 | usage = response.info['usage'] 122 | content = response.msgs[0].content.strip() 123 | 124 | # Analyze response content 125 | query_records = self._parse_response_content(content) 126 | 127 | # Validate and collect valid queries. 128 | for item in query_records: 129 | if self._validate_query(item): 130 | dataset.append(item) 131 | logger.info( 132 | f"Sample collection progress: " 133 | f"{Fore.GREEN}{len(dataset)}/{query_samples_size}{Fore.RESET}" 134 | ) 135 | else: 136 | error_query_records.append(item) 137 | 138 | # If there are multiple consecutive instances without valid 139 | # samples, consider redesigning the prompt or exiting early. 140 | 141 | except QueryParsingError as e: 142 | logger.error(f"Failed to parse response: {e!s}") 143 | except Exception as e: 144 | logger.error(f"An unexpected error occurred while generating the sample: {e!s}") 145 | 146 | return SchemaParseResponse( 147 | data=dataset[:query_samples_size], 148 | usage=usage, 149 | errors=error_query_records if error_query_records else None, 150 | ) 151 | -------------------------------------------------------------------------------- /camel_database_agent/datagen/prompts.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import textwrap 16 | 17 | 18 | class PromptTemplates: 19 | QUESTION_INFERENCE_PIPELINE = textwrap.dedent(""" 20 | # JSON Format Request 21 | 22 | You are a specialized JSON generator. Your only function is to parse the provided data and convert it to JSON format, strictly following the format requirements. 23 | 24 | ## Database Schema: 25 | ``` 26 | {{ddl_sql}} 27 | ``` 28 | 29 | ## Data Example: 30 | ```sql 31 | {{data_sql}} 32 | ``` 33 | 34 | ## Instructions: 35 | Database System: {{dialect_name}} 36 | 1. Please carefully analyze the following database information and conduct an in-depth analysis from a business perspective. What business query questions might users raise? Please fully consider some complex query scenarios, including but not limited to multi-table associations, grouping statistics, etc. 37 | 2. Please ensure that the SQL you write conforms to {{dialect_name}} syntax. 38 | 3. Generate {{query_samples_size}} real user query questions along with the corresponding SQL query statements without using placeholders 39 | 4. Create a JSON array with each table as an object 40 | 5. Each object must have exactly three fields: 41 | - "id": the table name 42 | - "question": a query in natural language. 43 | - "sql": sql statements without placeholders. 44 | 6. The entire response must be ONLY valid JSON without any additional text, explanation, or markdown code blocks 45 | 46 | ## Required Output Format: 47 | { 48 | "items":[{ 49 | "id": "
", 50 | "question": "", 51 | "sql": "" 52 | }] 53 | } 54 | 55 | ## IMPORTANT: 56 | - Your response must contain ONLY the JSON object, nothing else 57 | - Do not include explanations, introductions, or conclusions 58 | - Do not use markdown code blocks (```) around the JSON 59 | - Do not include phrases like "Here's the JSON" or "I've created the JSON" 60 | - Do not indicate that you are providing the output in any way.""") 61 | -------------------------------------------------------------------------------- /camel_database_agent/knowledge/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /camel_database_agent/knowledge/knowledge.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | from abc import ABC, abstractmethod 16 | from typing import Any, Dict, Generic, List, Type, TypeVar, Union 17 | 18 | from camel.agents import ChatAgent 19 | from camel.embeddings import BaseEmbedding 20 | from camel.models import BaseModelBackend 21 | from camel.storages import ( 22 | BaseVectorStorage, 23 | VectorDBQuery, 24 | VectorDBQueryResult, 25 | VectorRecord, 26 | ) 27 | 28 | from camel_database_agent.core.exceptions import KnowledgeException 29 | from camel_database_agent.core.method_lru_cache import method_lru_cache 30 | from camel_database_agent.database.schema import ( 31 | DDLRecord, 32 | DMLRecord, 33 | QueryRecord, 34 | ) 35 | 36 | RecordType = TypeVar("RecordType", DDLRecord, DMLRecord, QueryRecord) 37 | T = TypeVar("T", DDLRecord, DMLRecord, QueryRecord) 38 | 39 | 40 | class DatabaseKnowledge(ABC, Generic[T]): 41 | def __init__( 42 | self, 43 | embedding: BaseEmbedding, 44 | model: Union[BaseModelBackend, List[BaseModelBackend]], 45 | table_storage: BaseVectorStorage, 46 | data_storage: BaseVectorStorage, 47 | query_storage: BaseVectorStorage, 48 | **data: Any, 49 | ): 50 | super().__init__(**data) 51 | self.embedding = embedding 52 | self.table_storage = table_storage 53 | self.data_storage = data_storage 54 | self.query_storage = query_storage 55 | self.ddl_parsing_agent = ChatAgent( 56 | system_message="You are a database expert, skilled at parsing " 57 | "DDL statements, extracting key information, and " 58 | "converting it into JSON format.", 59 | model=model, 60 | message_window_size=10, 61 | ) 62 | 63 | # 存储类型与存储介质的映射 64 | self._storage_map: Dict[Type[RecordType], BaseVectorStorage] = { # type: ignore[valid-type] 65 | DDLRecord: self.table_storage, 66 | DMLRecord: self.data_storage, 67 | QueryRecord: self.query_storage, 68 | } 69 | 70 | # 记录类型与嵌入内容字段的映射 71 | self._embed_field_map: Dict[Type[RecordType], str] = { # type: ignore[valid-type] 72 | DDLRecord: "summary", 73 | DMLRecord: "summary", 74 | QueryRecord: "question", 75 | } 76 | 77 | def add(self, records: List[T]) -> None: 78 | """添加记录到相应存储中""" 79 | # 按类型分组记录 80 | grouped_records: Dict[Type[RecordType], List[RecordType]] = {} # type: ignore[valid-type] 81 | for record in records: 82 | record_type = type(record) 83 | if record_type not in self._storage_map: 84 | raise KnowledgeException(f"不支持的记录类型: {record_type}") 85 | 86 | if record_type not in grouped_records: 87 | grouped_records[record_type] = [] 88 | grouped_records[record_type].append(record) 89 | 90 | # 为每种类型创建向量记录并添加到存储中 91 | for record_type, type_records in grouped_records.items(): 92 | storage = self._storage_map[record_type] 93 | embed_field = self._embed_field_map[record_type] 94 | 95 | try: 96 | v_records = [ 97 | VectorRecord( 98 | vector=self.embedding.embed(getattr(record, embed_field)), 99 | payload=record.model_dump(), # type: ignore[attr-defined] 100 | ) 101 | for record in type_records 102 | ] 103 | storage.add(v_records) 104 | except Exception as e: 105 | raise KnowledgeException(f"添加记录时发生错误: {e!s}") 106 | 107 | @method_lru_cache(maxsize=128) 108 | def _generic_query(self, query: str, record_type: Type[T], top_k: int = 8) -> List[T]: 109 | """General query method, supports caching.""" 110 | storage = self._storage_map.get(record_type) 111 | if not storage: 112 | raise KnowledgeException(f"未找到记录类型 {record_type.__name__} 的存储") 113 | 114 | try: 115 | query_vector = self.embedding.embed(query) 116 | vector_result: List[VectorDBQueryResult] = storage.query( 117 | VectorDBQuery(query_vector=query_vector, top_k=top_k) 118 | ) 119 | 120 | records = [] 121 | for result in vector_result: 122 | if result.record.payload is not None: 123 | record: T = record_type(**result.record.payload) 124 | records.append(record) 125 | return records 126 | except Exception as e: 127 | raise KnowledgeException(f"查询 {record_type.__name__} 时发生错误: {e!s}") 128 | 129 | def query_ddl(self, query: str, top_k: int = 8) -> List[DDLRecord]: 130 | """查询DDL记录""" 131 | return self._generic_query(query, DDLRecord, top_k) 132 | 133 | def query_data(self, query: str, top_k: int = 8) -> List[DMLRecord]: 134 | """查询DML记录""" 135 | return self._generic_query(query, DMLRecord, top_k) 136 | 137 | def query_query(self, query: str, top_k: int = 8) -> List[QueryRecord]: 138 | """查询Query记录""" 139 | return self._generic_query(query, QueryRecord, top_k) 140 | 141 | @abstractmethod 142 | def clear(self) -> None: 143 | """清除所有存储数据""" 144 | raise NotImplementedError 145 | 146 | @abstractmethod 147 | def get_table_collection_size(self) -> int: 148 | """获取表集合的大小""" 149 | raise NotImplementedError 150 | 151 | @abstractmethod 152 | def get_data_collection_size(self) -> int: 153 | """获取数据集合的大小""" 154 | raise NotImplementedError 155 | 156 | @abstractmethod 157 | def get_query_collection_size(self) -> int: 158 | """获取查询集合的大小""" 159 | raise NotImplementedError 160 | 161 | @abstractmethod 162 | def get_query_collection_sample(self, n: int = 20) -> List[QueryRecord]: 163 | """获取查询集合的样本""" 164 | raise NotImplementedError 165 | -------------------------------------------------------------------------------- /camel_database_agent/knowledge/knowledge_qdrant.py: -------------------------------------------------------------------------------- 1 | # Copyright 2025 Lei Zhang 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | 15 | import logging 16 | import random 17 | from typing import List, Optional, Union 18 | 19 | from camel.embeddings import BaseEmbedding 20 | from camel.models import BaseModelBackend 21 | from camel.storages import QdrantStorage 22 | from qdrant_client.conversions.common_types import CollectionInfo 23 | 24 | from camel_database_agent.database.schema import QueryRecord 25 | from camel_database_agent.knowledge.knowledge import DatabaseKnowledge 26 | 27 | logger = logging.getLogger(__name__) 28 | 29 | 30 | class DatabaseKnowledgeQdrant(DatabaseKnowledge): 31 | def __init__( 32 | self, 33 | embedding: BaseEmbedding, 34 | model: Union[BaseModelBackend, List[BaseModelBackend]], 35 | path: Optional[str] = None, 36 | ): 37 | self.path = path 38 | try: 39 | table_storage = QdrantStorage( 40 | vector_dim=embedding.get_output_dim(), 41 | collection_name="table_documents", 42 | path=path if path else ":memory:", 43 | ) 44 | data_storage = QdrantStorage( 45 | vector_dim=embedding.get_output_dim(), 46 | collection_name="data_documents", 47 | path=path if path else ":memory:", 48 | ) 49 | query_storage = QdrantStorage( 50 | vector_dim=embedding.get_output_dim(), 51 | collection_name="query_documents", 52 | path=path if path else ":memory:", 53 | ) 54 | except ValueError as e: 55 | logger.error( 56 | "Adjust your embedding model to output vectors with " 57 | "the same dimensions as the existing collection. " 58 | "Alternatively, delete the existing collection and " 59 | "recreate it with your current embedding dimensions " 60 | "(note: this will result in the loss of all existing " 61 | "data)." 62 | ) 63 | raise e 64 | super().__init__( 65 | embedding=embedding, 66 | model=model, 67 | table_storage=table_storage, 68 | data_storage=data_storage, 69 | query_storage=query_storage, 70 | ) 71 | 72 | def clear(self) -> None: 73 | self.table_storage.clear() 74 | self.data_storage.clear() 75 | self.query_storage.clear() 76 | 77 | def get_table_collection_size(self) -> int: 78 | collection_info: CollectionInfo = self.table_storage.client.get_collection( 79 | "table_documents" 80 | ) 81 | return collection_info.points_count if collection_info.points_count else 0 82 | 83 | def get_data_collection_size(self) -> int: 84 | collection_info: CollectionInfo = self.data_storage.client.get_collection("data_documents") 85 | return collection_info.points_count if collection_info.points_count else 0 86 | 87 | def get_query_collection_size(self) -> int: 88 | collection_info: CollectionInfo = self.query_storage.client.get_collection( 89 | "query_documents" 90 | ) 91 | return collection_info.points_count if collection_info.points_count else 0 92 | 93 | def get_query_collection_sample(self, n: int = 20) -> List[QueryRecord]: 94 | # Get actual point IDs from the collection 95 | collection_info = self.query_storage.client.scroll( 96 | collection_name="query_documents", 97 | limit=self.get_query_collection_size(), 98 | ) 99 | point_ids = [point.id for point in collection_info[0]] 100 | 101 | # Sample n random IDs from actual IDs 102 | random_ids = random.sample(point_ids, min(n, len(point_ids))) 103 | 104 | # Retrieve points using correct IDs 105 | search_result = self.query_storage.client.retrieve("query_documents", ids=random_ids) 106 | return [QueryRecord(**record.payload) for record in search_result] 107 | -------------------------------------------------------------------------------- /camel_database_agent/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/camel_database_agent/py.typed -------------------------------------------------------------------------------- /conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | from dotenv import load_dotenv 5 | 6 | 7 | @pytest.fixture(scope="session", autouse=True) 8 | def load_env() -> None: 9 | load_dotenv(os.path.join(os.path.dirname(__file__), ".env")) 10 | -------------------------------------------------------------------------------- /database/mysql/1_ddl_sql.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE Buildings ( 2 | BuildingCode VARCHAR(50) NOT NULL PRIMARY KEY COMMENT '建筑物代码', 3 | BuildingName TEXT COMMENT '建筑物名称', 4 | NumberOfFloors INT COMMENT '楼层数量', 5 | ElevatorAccess TINYINT(1) NOT NULL DEFAULT 0 COMMENT '电梯可用性', 6 | SiteParkingAvailable TINYINT(1) NOT NULL DEFAULT 0 COMMENT '场地停车可用性' 7 | ) COMMENT = '建筑物信息表'; 8 | 9 | CREATE TABLE Categories ( 10 | CategoryID VARCHAR(50) NOT NULL PRIMARY KEY COMMENT '类别ID', 11 | CategoryDescription TEXT COMMENT '类别描述', 12 | DepartmentID INT DEFAULT 0 COMMENT '部门ID' 13 | ) COMMENT = '类别信息表'; 14 | 15 | CREATE TABLE Majors ( 16 | MajorID INT NOT NULL PRIMARY KEY COMMENT '专业ID', 17 | Major TEXT COMMENT '专业名称' 18 | ) COMMENT = '专业信息表'; 19 | 20 | CREATE TABLE Staff ( 21 | StaffID INT NOT NULL PRIMARY KEY COMMENT '员工ID', 22 | StfFirstName TEXT COMMENT '员工名', 23 | StfLastname TEXT COMMENT '员工姓', 24 | StfStreetAddress TEXT COMMENT '员工街道地址', 25 | StfCity TEXT COMMENT '员工城市', 26 | StfState TEXT COMMENT '员工州', 27 | StfZipCode TEXT COMMENT '员工邮政编码', 28 | StfAreaCode TEXT COMMENT '员工区号', 29 | StfPhoneNumber TEXT COMMENT '员工电话号码', 30 | Salary FLOAT COMMENT '薪资', 31 | DateHired DATE COMMENT '雇佣日期', 32 | Position TEXT COMMENT '职位' 33 | ) COMMENT = '员工信息表'; 34 | 35 | CREATE TABLE Student_Class_Status ( 36 | ClassStatus INT NOT NULL PRIMARY KEY COMMENT '班级状态ID', 37 | ClassStatusDescription TEXT COMMENT '班级状态描述' 38 | ) COMMENT = '班级状态信息表'; 39 | 40 | CREATE TABLE Class_Rooms ( 41 | ClassRoomID INT NOT NULL PRIMARY KEY COMMENT '教室ID', 42 | BuildingCode VARCHAR(50) COMMENT '建筑物代码', 43 | PhoneAvailable TINYINT(1) NOT NULL DEFAULT 0 COMMENT '电话可用性' 44 | ) COMMENT = '教室信息表'; 45 | 46 | CREATE TABLE Departments ( 47 | DepartmentID INT NOT NULL PRIMARY KEY COMMENT '部门ID', 48 | DeptName TEXT COMMENT '部门名称', 49 | DeptChair INT DEFAULT 0 COMMENT '部门主任ID' 50 | ) COMMENT = '部门信息表'; 51 | 52 | CREATE TABLE Faculty ( 53 | StaffID INT NOT NULL PRIMARY KEY COMMENT '员工ID', 54 | Title TEXT COMMENT '职称', 55 | Status TEXT COMMENT '状态', 56 | Tenured TINYINT(1) NOT NULL DEFAULT 0 COMMENT '终身职教职' 57 | ) COMMENT = '教职员工信息表'; 58 | 59 | CREATE TABLE Students ( 60 | StudentID INT NOT NULL PRIMARY KEY COMMENT '学生ID', 61 | StudFirstName TEXT COMMENT '学生名', 62 | StudLastName TEXT COMMENT '学生姓', 63 | StudStreetAddress TEXT COMMENT '学生街道地址', 64 | StudCity TEXT COMMENT '学生城市', 65 | StudState TEXT COMMENT '学生州', 66 | StudZipCode TEXT COMMENT '学生邮政编码', 67 | StudAreaCode TEXT COMMENT '学生区号', 68 | StudPhoneNumber TEXT COMMENT '学生电话号码', 69 | StudGPA FLOAT DEFAULT 0 COMMENT '学生GPA', 70 | StudMajor INT COMMENT '学生专业ID' 71 | ) COMMENT = '学生信息表'; 72 | 73 | CREATE TABLE Subjects ( 74 | SubjectID INT NOT NULL PRIMARY KEY COMMENT '科目ID', 75 | CategoryID VARCHAR(50) COMMENT '类别ID', 76 | SubjectCode TEXT COMMENT '科目代码', 77 | SubjectName TEXT COMMENT '科目名称', 78 | SubjectPreReq TEXT DEFAULT NULL COMMENT '科目先修课程', 79 | SubjectDescription TEXT COMMENT '科目描述' 80 | ) COMMENT = '科目信息表'; 81 | 82 | CREATE TABLE Classes ( 83 | ClassID INT NOT NULL PRIMARY KEY COMMENT '课程ID', 84 | SubjectID INT DEFAULT 0 COMMENT '科目ID', 85 | ClassRoomID INT DEFAULT 0 COMMENT '教室ID', 86 | Credits INT DEFAULT 0 COMMENT '学分', 87 | StartDate DATE COMMENT '开始日期', 88 | StartTime TIME COMMENT '开始时间', 89 | Duration INT DEFAULT 0 COMMENT '���续时间', 90 | MondaySchedule TINYINT(1) NOT NULL DEFAULT 0 COMMENT '周一课程安排', 91 | TuesdaySchedule TINYINT(1) NOT NULL DEFAULT 0 COMMENT '���二课程安排', 92 | WednesdaySchedule TINYINT(1) NOT NULL DEFAULT 0 COMMENT '周三课程安排', 93 | ThursdaySchedule TINYINT(1) NOT NULL DEFAULT 0 COMMENT '周四课程安排', 94 | FridaySchedule TINYINT(1) NOT NULL DEFAULT 0 COMMENT '周五课程安排', 95 | SaturdaySchedule TINYINT(1) NOT NULL DEFAULT 0 COMMENT '周六课程安排' 96 | ) COMMENT = '课程信息表'; 97 | 98 | CREATE TABLE Faculty_Categories ( 99 | StaffID INT NOT NULL COMMENT '员工ID', 100 | CategoryID VARCHAR(50) NOT NULL COMMENT '类别ID', 101 | PRIMARY KEY (StaffID, CategoryID) 102 | ) COMMENT = '教职员工类别信息表'; 103 | 104 | CREATE TABLE Faculty_Subjects ( 105 | StaffID INT NOT NULL COMMENT '员工ID', 106 | SubjectID INT NOT NULL COMMENT '科目ID', 107 | ProficiencyRating FLOAT DEFAULT 0 COMMENT '专业能力评分', 108 | PRIMARY KEY (StaffID, SubjectID) 109 | ) COMMENT = '教职员工科目信息表'; 110 | 111 | CREATE TABLE Faculty_Classes ( 112 | ClassID INT NOT NULL COMMENT '课程ID', 113 | StaffID INT NOT NULL COMMENT '员工ID', 114 | PRIMARY KEY (ClassID, StaffID) 115 | ) COMMENT = '教职员工授课信息表'; 116 | 117 | CREATE TABLE Student_Schedules ( 118 | StudentID INT NOT NULL COMMENT '学生ID', 119 | ClassID INT NOT NULL COMMENT '课程ID', 120 | ClassStatus INT DEFAULT 0 COMMENT '班级状态', 121 | Grade FLOAT DEFAULT 0 COMMENT '成绩', 122 | PRIMARY KEY (StudentID, ClassID) 123 | ) COMMENT = '学生课程安排信息表'; -------------------------------------------------------------------------------- /database/mysql/2_data_sql.sql: -------------------------------------------------------------------------------- 1 | INSERT INTO Buildings (BuildingCode, BuildingName, NumberOfFloors, ElevatorAccess, SiteParkingAvailable) VALUES ('AS', 'Arts and Sciences', 3, 1, 1); 2 | INSERT INTO Buildings (BuildingCode, BuildingName, NumberOfFloors, ElevatorAccess, SiteParkingAvailable) VALUES ('CC', 'College Center', 3, 1, 0); 3 | INSERT INTO Buildings (BuildingCode, BuildingName, NumberOfFloors, ElevatorAccess, SiteParkingAvailable) VALUES ('GYM', 'PE and Wellness', 1, 0, 1); 4 | INSERT INTO Buildings (BuildingCode, BuildingName, NumberOfFloors, ElevatorAccess, SiteParkingAvailable) VALUES ('IB', 'Instructional Building', 3, 1, 1); 5 | INSERT INTO Buildings (BuildingCode, BuildingName, NumberOfFloors, ElevatorAccess, SiteParkingAvailable) VALUES ('LB', 'Library', 2, 1, 1); 6 | INSERT INTO Buildings (BuildingCode, BuildingName, NumberOfFloors, ElevatorAccess, SiteParkingAvailable) VALUES ('TB', 'Technology Building', 2, 1, 1); 7 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('ACC', 'Accounting', 1); 8 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('ART', 'Art', 3); 9 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('BIO', 'Biology', 2); 10 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('BUS', 'Business', 1); 11 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('CHE', 'Chemistry', 2); 12 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('CIS', 'Computer Information Systems', 5); 13 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('CSC', 'Computer Science', 5); 14 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('ECO', 'Economics', 4); 15 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('ENG', 'English', 3); 16 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('FRE', 'French', 3); 17 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('GEG', 'Geography', 4); 18 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('GER', 'German', 3); 19 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('HIS', 'History', 4); 20 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('JRN', 'Journalism', 1); 21 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('MAT', 'Math', 2); 22 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('MUS', 'Music', 3); 23 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('PHY', 'Physics', 2); 24 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('POL', 'Political Science', 4); 25 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('PSY', 'Psychology', 4); 26 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1131, 'LB', 1); 27 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1142, 'LB', 0); 28 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1231, 'LB', 1); 29 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1514, 'AS', 1); 30 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1515, 'AS', 1); 31 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1519, 'AS', 0); 32 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1525, 'AS', 1); 33 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1530, 'AS', 1); 34 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1532, 'AS', 0); 35 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1619, 'AS', 0); 36 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1622, 'AS', 1); 37 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1624, 'AS', 0); 38 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1627, 'AS', 0); 39 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1639, 'TB', 1); 40 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1640, 'TB', 0); 41 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1641, 'TB', 0); 42 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1642, 'TB', 0); 43 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (2357, 'CC', 1); 44 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (2408, 'IB', 0); 45 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (2423, 'IB', 0); 46 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1000, 11, 1231, 5, '2017-09-12', '10:00:00', 50, 0, 1, 1, 1, 1, 1); 47 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1002, 12, 1619, 4, '2017-09-11', '15:30:00', 110, 1, 0, 1, 0, 0, 0); 48 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1004, 13, 1627, 4, '2017-09-11', '08:00:00', 50, 1, 0, 1, 1, 1, 0); 49 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1006, 13, 1627, 4, '2017-09-11', '09:00:00', 110, 1, 0, 1, 0, 0, 0); 50 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1012, 14, 1627, 4, '2017-09-12', '13:00:00', 110, 0, 1, 0, 1, 0, 0); 51 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1020, 15, 3404, 4, '2017-09-12', '13:00:00', 110, 0, 1, 0, 1, 0, 0); 52 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1030, 16, 1231, 5, '2017-09-11', '11:00:00', 50, 1, 1, 1, 1, 1, 0); 53 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1031, 16, 1231, 5, '2017-09-11', '14:00:00', 50, 1, 1, 1, 1, 1, 0); 54 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1156, 37, 3443, 5, '2017-09-11', '16:00:00', 50, 1, 1, 1, 1, 1, 0); 55 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1162, 37, 3443, 5, '2017-09-11', '09:00:00', 140, 1, 0, 1, 0, 0, 0); 56 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1168, 37, 3445, 5, '2017-09-11', '11:00:00', 50, 1, 1, 1, 1, 1, 0); 57 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1180, 38, 3446, 5, '2017-09-11', '11:30:00', 140, 1, 0, 1, 0, 0, 0); 58 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1183, 38, 3415, 5, '2017-09-11', '13:00:00', 50, 1, 1, 1, 1, 1, 0); 59 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1184, 38, 3415, 5, '2017-09-11', '14:00:00', 50, 1, 1, 1, 1, 1, 0); 60 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1196, 39, 3415, 5, '2017-09-11', '15:00:00', 50, 1, 1, 1, 1, 1, 0); 61 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1500, 33, 1142, 5, '2017-09-11', '08:00:00', 50, 1, 1, 1, 1, 1, 0); 62 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1502, 34, 1142, 5, '2017-09-11', '09:00:00', 50, 1, 1, 1, 1, 1, 0); 63 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1560, 35, 2408, 3, '2017-09-12', '10:00:00', 50, 0, 1, 0, 1, 0, 1); 64 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1562, 36, 2408, 5, '2017-09-11', '12:00:00', 140, 1, 0, 0, 0, 1, 0); 65 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1642, 29, 1514, 2, '2017-09-12', '11:00:00', 50, 0, 1, 0, 1, 0, 0); 66 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (1, 'ACC', 'ACC 210', 'Financial Accounting Fundamentals I', 'Introduces basic accounting concepts, principles and prodcedures for recording business transactions and developing financial accounting reports. Excel spreadsheet component.'); 67 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectPreReq, SubjectDescription) VALUES (2, 'ACC', 'ACC 220', 'Financial Accounting Fundamentals II', 'ACC 210', 'Applications of basic accounting concepts, principles and procedures to more complex business situations and to different forms of enterprise ownership. Includes computerized element. Prereq: ACC 210 or instructor permission.'); 68 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectPreReq, SubjectDescription) VALUES (3, 'ACC', 'ACC 230', 'Fundamentals of Managerial Accounting', 'ACC 220', 'Analysis of accounting data as part of the managerial process of planning, decision making and control. Concentrates on economic decision making in enterprises. Includes computerized element. Prereq: ACC 220 or instructor permission.'); 69 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectPreReq, SubjectDescription) VALUES (4, 'ACC', 'ACC 251', 'Intermediate Accounting', 'ACC 220', 'In-depth review of financial accounting principles. Emphasizes the conceptual framework of accounting, revenue and expense recognition. Accounts Receivable, Depreciation, and Amortization, etc. Prereq: ACC 220 or instructor permission.'); 70 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectPreReq, SubjectDescription) VALUES (5, 'ACC', 'ACC 257', 'Business Tax Accounting', 'ACC 220', 'Basic principles, practices and governmental regulations (Federal, Washington, State, and local) involved in business tax accounting including filing returns, record keeping, tax planning, and registrations and business licenses. Prereq: ACC 220 or instructors permissions.'); 71 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (6, 'BUS', 'BUS 101', 'Introduction to Business', 'Survey of businss practices. Covers business terminology, forms of business ownership, franchising, small and international businesses, leadership and management, marketing principles, financing and investment methods, and business environment.'); 72 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (7, 'BUS', 'BUS 155', 'Developing A Feasibility Plan', 'With the aid of a counselor, a feasibility plan will be developed which will be the basis or start of your business plan. Must be concurrently enrolled in BUS 151.'); 73 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (8, 'BUS', 'BUS 151', 'Introduction to Entrepreneurship', 'Overview of the entrepreneurial process, examination of the marketplace, and discussion of successful business strategies. Product selection, selling and marketing strategies. Sources of information and assistance. Must be concurrently enrolled in BUS 155.'); 74 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (9, 'BUS', 'BUS 170', 'Information Technology I', 'Uses Word for Windows word processing skills, document formatting, keyboarding, and 10-key keypad skills. Emphasis on preparing letters, memos, reports, and tables. Introduces Excel spreadsheet basics.'); 75 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectPreReq, SubjectDescription) VALUES (10, 'BUS', 'BUS 171', 'Information Technology II', 'BUS 170', 'Uses intermediate Word features including formatting and production, mail merge, macros, text columns, graphics, and fonts; Excel spreadsheet; and introduction to PowerPoint presentation software, Internet and email. Prereq: BUS 170 or permission from instructor.'); 76 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (11, 'ART', 'ART 100', 'Introduction to Art', 'Historical backgrounds and design fundamentals which have affected art. Includes slide lectures, reading and practical studio applications.'); 77 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (12, 'ART', 'ART 101', 'Design', 'Studio sudies in the fundamentals of two-dimensional art with problems based on line, space, texture, shape and color theories. Includes practical applications of these theories to design.'); 78 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (13, 'ART', 'ART 111', 'Drawing', 'Study of line, value, space, perspective, and compostion through the use o charcoal, pencil, pen, and brush.'); 79 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (14, 'ART', 'ART 201', 'Painting', 'Beginning painting in oil or synthetic media using still life. Emphasis on basics such as composition, value studies, color mixing, canvas preparation, and various styles and techniques. No prerequisite; some drawing background important.'); 80 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (15, 'ART', 'ART 210', 'Computer Art', 'Explore the elements of art such as line, value, space, composition, and color through the use of the computer. Sudents will create works of art using the computer.'); 81 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (16, 'ART', 'ART 251', 'Art History', 'Surveys major forms of visual expression from the Paleolithic, Egyptian, Mesopotamian, Greek, Roman, and Early Christian periods. Includes painting, sculpture, architecture, lectures, slides, and readings.'); 82 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (17, 'BIO', 'BIO 100', 'Biological Principles', 'An introductory biology course with lab for the non-science major. May include maintenance of the balance between man and his environment, nutrition, genetics and inheritence, ecological principles, plant and animal diversity, and evolution.'); 83 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (18, 'BIO', 'BIO 101', 'General Biology', 'Basic bilogical concepts with emphasis on general cell processes, plant and animal diversity, morphyology, limited reproduction, phylogeny of the living organisms, exploration of molecular genetics.'); 84 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (19, 'BIO', 'BIO 280', 'Microbiology', 'Introduction to micro-organisms including microbial cell structure and function; metabolism; microbial genetics; and the role of micro-organisms in disease, immunity, and other selected applied areas.'); 85 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (20, 'CHE', 'CHE 101', 'Chemistry', 'General chemistry for non-science majors. Completion of CHE 101 fulfills chemistry requirements for many health science majors.'); 86 | INSERT INTO Departments (DepartmentID, DeptName, DeptChair) VALUES (1, 'Business Administration', 98012); 87 | INSERT INTO Departments (DepartmentID, DeptName, DeptChair) VALUES (2, 'Sciences', 98010); 88 | INSERT INTO Departments (DepartmentID, DeptName, DeptChair) VALUES (3, 'Humanities', 98005); 89 | INSERT INTO Departments (DepartmentID, DeptName, DeptChair) VALUES (4, 'Social Sciences', 98059); 90 | INSERT INTO Departments (DepartmentID, DeptName, DeptChair) VALUES (5, 'Information Technology', 98007); 91 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98005, 'Suzanne', 'Viescas', '15127 NE 24th, #383', 'Redmond', 'WA', '98052', '425', '555-2686', 44000.0, '1986-05-31', 'Faculty'); 92 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98007, 'Gary', 'Hallmark', 'Route 2, Box 203B', 'Auburn', 'WA', '98002', '253', '555-2676', 53000.0, '1985-01-21', 'Faculty'); 93 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98010, 'Jeffrey', 'Smith', '30301 - 166th Ave. N.E.', 'Fremont', 'CA', '94538', '510', '555-2596', 52000.0, '1983-10-06', 'Faculty'); 94 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98011, 'Ann', 'Patterson', '16 Maple Lane', 'Marysville', 'WA', '98271', '253', '555-2591', 45000.0, '1983-10-16', 'Faculty'); 95 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98012, 'Robert', 'Brown', '672 Lamont Ave', 'Houston', 'TX', '77201', '713', '555-2491', 49000.0, '1989-02-09', 'Faculty'); 96 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98013, 'Deb', 'Waldal', '908 W. Capital Way', 'Tacoma', 'WA', '98413', '253', '555-2496', 44000.0, '1986-07-05', 'Faculty'); 97 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98014, 'Peter', 'Brehm', '722 Moss Bay Blvd.', 'Kirkland', 'WA', '98033', '425', '555-2501', 60000.0, '1986-07-16', 'Faculty'); 98 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98019, 'Mariya', 'Sergienko', '901 Pine Avenue', 'Portland', 'OR', '97208', '503', '555-2526', 45000.0, '1989-11-02', 'Faculty'); 99 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98020, 'Jim', 'Glynn', '13920 S.E. 40th Street', 'Bellevue', 'WA', '98009', '425', '555-2531', 45000.0, '1985-08-02', 'Faculty'); 100 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98021, 'Tim', 'Smith', '30301 - 166th Ave. N.E.', 'Seattle', 'WA', '98106', '206', '555-2536', 40000.0, '1988-12-17', 'Registrar'); 101 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98025, 'Carol', 'Viescas', '722 Moss Bay Blvd.', 'Kirkland', 'WA', '98033', '425', '555-2576', 50000.0, '1984-04-12', 'Faculty'); 102 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98028, 'Alaina', 'Hallmark', 'Route 2, Box 203B', 'Marysville', 'WA', '98270', '425', '555-2631', 57000.0, '1984-01-17', 'Faculty'); 103 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98030, 'Liz', 'Keyser', '13920 S.E. 40th Street', 'Bellevue', 'WA', '98006', '425', '555-2556', 48000.0, '1988-05-31', 'Faculty'); 104 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98036, 'Sam', 'Abolrous', '611 Alpine Drive', 'Palm Springs', 'CA', '92263', '760', '555-2611', 60000.0, '1982-11-20', 'Faculty'); 105 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98040, 'Jim', 'Wilson', '101 NE 88th', 'Salem', 'OR', '97301', '503', '555-2636', 50000.0, '1987-01-13', 'Faculty'); 106 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98042, 'David', 'Smith', '311 20th Ave. N.E.', 'Fremont', 'CA', '94538', '510', '555-2646', 52000.0, '1991-12-17', 'Faculty'); 107 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98043, 'Kathryn', 'Patterson', '16 Maple Lane', 'Seattle', 'WA', '98115', '206', '555-2651', 25000.0, '1984-11-14', 'Secretary'); 108 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98045, 'Michael', 'Hernandez', 'PO Box 223311', 'Tacoma', 'WA', '98413', '253', '555-2711', 60000.0, '1990-08-20', 'Faculty'); 109 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98048, 'Joyce', 'Bonnicksen', '2424 Thames Drive', 'Bellevue', 'WA', '98006', '425', '555-2726', 60000.0, '1986-03-02', 'Faculty'); 110 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98052, 'Katherine', 'Ehrlich', '777 Fenexet Blvd', 'Redmond', 'WA', '98052', '425', '555-0399', 45000.0, '1985-03-08', 'Faculty'); 111 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98005, 'Professor', 'Full Time', 1); 112 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98007, 'Professor', 'Full Time', 1); 113 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98010, 'Professor', 'On Leave', 1); 114 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98011, 'Instructor', 'Full Time', 1); 115 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98012, 'Professor', 'Full Time', 1); 116 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98013, 'Instructor', 'Full Time', 1); 117 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98014, 'Associate Professor', 'Full Time', 1); 118 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98019, 'Instructor', 'Full Time', 1); 119 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98020, 'Instructor', 'Full Time', 1); 120 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98025, 'Associate Professor', 'Full Time', 1); 121 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98028, 'Professor', 'Full Time', 1); 122 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98030, 'Instructor', 'Full Time', 1); 123 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98036, 'Professor', 'Full Time', 1); 124 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98040, 'Associate Professor', 'Full Time', 1); 125 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98042, 'Associate Professor', 'Full Time', 1); 126 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98045, 'Professor', 'Full Time', 1); 127 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98048, 'Professor', 'Full Time', 1); 128 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98052, 'Instructor', 'Part Time', 0); 129 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98053, 'Instructor', 'Full Time', 1); 130 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98055, 'Professor', 'Full Time', 1); 131 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98005, 'ART'); 132 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98005, 'ENG'); 133 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98005, 'MUS'); 134 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98007, 'ACC'); 135 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98007, 'ART'); 136 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98007, 'CIS'); 137 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98007, 'MAT'); 138 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98010, 'ACC'); 139 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98010, 'CIS'); 140 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98010, 'MAT'); 141 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98011, 'ENG'); 142 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98011, 'HIS'); 143 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98011, 'MAT'); 144 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98012, 'ACC'); 145 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98012, 'ECO'); 146 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98012, 'MAT'); 147 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98013, 'CIS'); 148 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98013, 'CSC'); 149 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98013, 'MAT'); 150 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98014, 'ART'); 151 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1000, 98014); 152 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1002, 98036); 153 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1004, 98019); 154 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1006, 98045); 155 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1012, 98030); 156 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1020, 98028); 157 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1030, 98036); 158 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1031, 98005); 159 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1156, 98055); 160 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1162, 98064); 161 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1168, 98055); 162 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1180, 98011); 163 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1183, 98005); 164 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1184, 98011); 165 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1196, 98028); 166 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1500, 98028); 167 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1502, 98036); 168 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1560, 98028); 169 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1562, 98036); 170 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (2001, 98020); 171 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98005, 12, 10.0); 172 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98005, 16, 10.0); 173 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98005, 34, 9.0); 174 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98005, 36, 8.0); 175 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98005, 38, 8.0); 176 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98007, 2, 9.0); 177 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98007, 5, 10.0); 178 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98007, 14, 8.0); 179 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98007, 24, 10.0); 180 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98007, 46, 10.0); 181 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98010, 2, 8.0); 182 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98010, 5, 10.0); 183 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98010, 24, 8.0); 184 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98010, 48, 10.0); 185 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98011, 38, 9.0); 186 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98011, 43, 8.0); 187 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98011, 49, 10.0); 188 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98012, 1, 9.0); 189 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98012, 4, 10.0); 190 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98012, 32, 9.0); 191 | INSERT INTO Majors (MajorID, Major) VALUES (1, 'General Studies'); 192 | INSERT INTO Majors (MajorID, Major) VALUES (2, 'English'); 193 | INSERT INTO Majors (MajorID, Major) VALUES (3, 'Music'); 194 | INSERT INTO Majors (MajorID, Major) VALUES (4, 'Information Sciences'); 195 | INSERT INTO Majors (MajorID, Major) VALUES (5, 'Accounting'); 196 | INSERT INTO Majors (MajorID, Major) VALUES (6, 'Art'); 197 | INSERT INTO Majors (MajorID, Major) VALUES (7, 'Mathematics'); 198 | INSERT INTO Student_Class_Status (ClassStatus, ClassStatusDescription) VALUES (1, 'Enrolled'); 199 | INSERT INTO Student_Class_Status (ClassStatus, ClassStatusDescription) VALUES (2, 'Completed'); 200 | INSERT INTO Student_Class_Status (ClassStatus, ClassStatusDescription) VALUES (3, 'Withdrew'); 201 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 1000, 2, 99.83); 202 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 1168, 2, 70.0); 203 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 2907, 2, 67.33); 204 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 3085, 2, 87.14); 205 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 4180, 1, 0.0); 206 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 5917, 1, 0.0); 207 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 6082, 1, 0.0); 208 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1002, 1156, 2, 86.33); 209 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1002, 1500, 2, 85.72); 210 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1002, 2223, 3, 0.0); 211 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1002, 2889, 2, 68.22); 212 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 1156, 2, 71.09); 213 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 1500, 2, 89.05); 214 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 1502, 2, 75.71); 215 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 2911, 2, 85.39); 216 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 4180, 1, 0.0); 217 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 4560, 1, 0.0); 218 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 6082, 1, 0.0); 219 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1004, 1162, 2, 91.44); 220 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1004, 2410, 2, 90.56); 221 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1001, 'Kerry', 'Patterson', '9877 Hacienda Drive', 'San Antonio', 'TX', '78284', '210', '555-2706', 74.465, 1); 222 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1002, 'David', 'Hamilton', '908 W. Capital Way', 'Tacoma', 'WA', '98413', '253', '555-2701', 78.755, 2); 223 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1003, 'Betsy', 'Stadick', '611 Alpine Drive', 'Palm Springs', 'CA', '92263', '760', '555-2696', 85.235, 3); 224 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1004, 'Janice', 'Galvin', '4110 Old Redmond Rd.', 'Redmond', 'WA', '98052', '425', '555-2691', 81.0, 4); 225 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1005, 'Doris', 'Hartwig', '4726 - 11th Ave. N.E.', 'Seattle', 'WA', '98105', '206', '555-2671', 72.225, 5); 226 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1006, 'Scott', 'Bishop', '66 Spring Valley Drive', 'Medford', 'OR', '97501', '541', '555-2666', 88.5, 6); 227 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1007, 'Elizabeth', 'Hallmark', 'Route 2, Box 203B', 'Marysville', 'WA', '98271', '253', '555-2521', 87.65, 7); 228 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1008, 'Sara', 'Sheskey', '16679 NE 41st Court', 'Portland', 'OR', '97208', '503', '555-2566', 84.625, 1); 229 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1009, 'Karen', 'Smith', '30301 - 166th Ave. N.E.', 'Eugene', 'OR', '97401', '541', '555-2551', 80.0, 2); 230 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1010, 'Marianne', 'Wier', '908 W. Capital Way', 'Tacoma', 'WA', '98413', '253', '555-2606', 83.55, 3); 231 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1011, 'John', 'Kennedy', '16679 NE 41st Court', 'Portland', 'OR', '97208', '503', '555-2621', 77.65, 4); 232 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1012, 'Sarah', 'Thompson', '2222 Springer Road', 'Lubbock', 'TX', '79402', '806', '555-2626', 89.5, 5); 233 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1013, 'Michael', 'Viescas', '15127 NE 24th, #383', 'Redmond', 'WA', '98052', '425', '555-2656', 80.25, 6); 234 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1014, 'Kendra', 'Bonnicksen', '12330 Larchlemont Lane', 'Seattle', 'WA', '98105', '206', '555-2716', 85.55, 7); 235 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1015, 'Brannon', 'Jones', '777 Fenexet Blvd', 'Long Beach', 'CA', '90809', '562', '555-0399', 86.0, 2); 236 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1016, 'Steve', 'Pundt', '2500 Rosales Lane', 'Dallas', 'TX', '75204', '972', '555-9938', 77.125, 4); 237 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1017, 'George', 'Chavez', '281 Old Navy Road', 'Marysville', 'WA', '98270', '206', '555-9930', 79.25, 6); 238 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1018, 'Richard', 'Lum', '754 Fourth Ave', 'Seattle', 'WA', '98115', '206', '555-2296', 72.55, 7); 239 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA) VALUES (1019, 'Daffy', 'Dumbwit', '4567 NE 32nd Ct', 'Bellevue', 'WA', '98002', '425', '555-9872', 0.0); -------------------------------------------------------------------------------- /database/postgresql/1_ddl_sql.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE Buildings ( 2 | BuildingCode TEXT NOT NULL PRIMARY KEY, 3 | BuildingName TEXT, 4 | NumberOfFloors INTEGER, 5 | ElevatorAccess BOOLEAN NOT NULL DEFAULT FALSE, 6 | SiteParkingAvailable BOOLEAN NOT NULL DEFAULT FALSE 7 | ); 8 | COMMENT ON TABLE Buildings IS '建筑物信息表'; 9 | COMMENT ON COLUMN Buildings.BuildingCode IS '建筑物代码'; 10 | COMMENT ON COLUMN Buildings.BuildingName IS '建筑物名称'; 11 | COMMENT ON COLUMN Buildings.NumberOfFloors IS '楼层数量'; 12 | COMMENT ON COLUMN Buildings.ElevatorAccess IS '电梯可用性'; 13 | COMMENT ON COLUMN Buildings.SiteParkingAvailable IS '场地停车可用性'; 14 | 15 | CREATE TABLE Categories ( 16 | CategoryID TEXT NOT NULL PRIMARY KEY, 17 | CategoryDescription TEXT, 18 | DepartmentID INTEGER DEFAULT 0 19 | ); 20 | COMMENT ON TABLE Categories IS '类别信息表'; 21 | COMMENT ON COLUMN Categories.CategoryID IS '类别ID'; 22 | COMMENT ON COLUMN Categories.CategoryDescription IS '类别描述'; 23 | COMMENT ON COLUMN Categories.DepartmentID IS '部门ID'; 24 | 25 | CREATE TABLE Majors ( 26 | MajorID INTEGER NOT NULL PRIMARY KEY, 27 | Major TEXT 28 | ); 29 | COMMENT ON TABLE Majors IS '专业信息表'; 30 | COMMENT ON COLUMN Majors.MajorID IS '专业ID'; 31 | COMMENT ON COLUMN Majors.Major IS '专业名称'; 32 | 33 | CREATE TABLE Staff ( 34 | StaffID INTEGER NOT NULL PRIMARY KEY, 35 | StfFirstName TEXT, 36 | StfLastname TEXT, 37 | StfStreetAddress TEXT, 38 | StfCity TEXT, 39 | StfState TEXT, 40 | StfZipCode TEXT, 41 | StfAreaCode TEXT, 42 | StfPhoneNumber TEXT, 43 | Salary REAL, 44 | DateHired DATE, 45 | Position TEXT 46 | ); 47 | COMMENT ON TABLE Staff IS '员工信息表'; 48 | COMMENT ON COLUMN Staff.StaffID IS '员工ID'; 49 | COMMENT ON COLUMN Staff.StfFirstName IS '员工名'; 50 | COMMENT ON COLUMN Staff.StfLastname IS '员工姓'; 51 | COMMENT ON COLUMN Staff.StfStreetAddress IS '员工街道地址'; 52 | COMMENT ON COLUMN Staff.StfCity IS '员工城市'; 53 | COMMENT ON COLUMN Staff.StfState IS '员工州'; 54 | COMMENT ON COLUMN Staff.StfZipCode IS '员工邮政编码'; 55 | COMMENT ON COLUMN Staff.StfAreaCode IS '员工区号'; 56 | COMMENT ON COLUMN Staff.StfPhoneNumber IS '员工电话号码'; 57 | COMMENT ON COLUMN Staff.Salary IS '薪资'; 58 | COMMENT ON COLUMN Staff.DateHired IS '雇佣日期'; 59 | COMMENT ON COLUMN Staff.Position IS '职位'; 60 | 61 | CREATE TABLE Student_Class_Status ( 62 | ClassStatus INTEGER NOT NULL PRIMARY KEY, 63 | ClassStatusDescription TEXT 64 | ); 65 | COMMENT ON TABLE Student_Class_Status IS '班级状态信息表'; 66 | COMMENT ON COLUMN Student_Class_Status.ClassStatus IS '班级状态ID'; 67 | COMMENT ON COLUMN Student_Class_Status.ClassStatusDescription IS '班级状态描述'; 68 | 69 | CREATE TABLE Class_Rooms ( 70 | ClassRoomID INTEGER NOT NULL PRIMARY KEY, 71 | BuildingCode TEXT, 72 | PhoneAvailable BOOLEAN NOT NULL DEFAULT FALSE 73 | ); 74 | COMMENT ON TABLE Class_Rooms IS '教室信息表'; 75 | COMMENT ON COLUMN Class_Rooms.ClassRoomID IS '教室ID'; 76 | COMMENT ON COLUMN Class_Rooms.BuildingCode IS '建筑物代码'; 77 | COMMENT ON COLUMN Class_Rooms.PhoneAvailable IS '电话可用性'; 78 | 79 | CREATE TABLE Departments ( 80 | DepartmentID INTEGER NOT NULL PRIMARY KEY, 81 | DeptName TEXT, 82 | DeptChair INTEGER DEFAULT 0 83 | ); 84 | COMMENT ON TABLE Departments IS '部门信息表'; 85 | COMMENT ON COLUMN Departments.DepartmentID IS '部门ID'; 86 | COMMENT ON COLUMN Departments.DeptName IS '部门名称'; 87 | COMMENT ON COLUMN Departments.DeptChair IS '部门主任ID'; 88 | 89 | CREATE TABLE Faculty ( 90 | StaffID INTEGER NOT NULL PRIMARY KEY, 91 | Title TEXT, 92 | Status TEXT, 93 | Tenured BOOLEAN NOT NULL DEFAULT FALSE 94 | ); 95 | COMMENT ON TABLE Faculty IS '教职员工信息表'; 96 | COMMENT ON COLUMN Faculty.StaffID IS '员工ID'; 97 | COMMENT ON COLUMN Faculty.Title IS '职称'; 98 | COMMENT ON COLUMN Faculty.Status IS '状态'; 99 | COMMENT ON COLUMN Faculty.Tenured IS '终身职教职'; 100 | 101 | CREATE TABLE Students ( 102 | StudentID INTEGER NOT NULL PRIMARY KEY, 103 | StudFirstName TEXT, 104 | StudLastName TEXT, 105 | StudStreetAddress TEXT, 106 | StudCity TEXT, 107 | StudState TEXT, 108 | StudZipCode TEXT, 109 | StudAreaCode TEXT, 110 | StudPhoneNumber TEXT, 111 | StudGPA REAL DEFAULT 0, 112 | StudMajor INTEGER 113 | ); 114 | COMMENT ON TABLE Students IS '学生信息表'; 115 | COMMENT ON COLUMN Students.StudentID IS '学生ID'; 116 | COMMENT ON COLUMN Students.StudFirstName IS '学生名'; 117 | COMMENT ON COLUMN Students.StudLastName IS '学生姓'; 118 | COMMENT ON COLUMN Students.StudStreetAddress IS '学生街道地址'; 119 | COMMENT ON COLUMN Students.StudCity IS '学生城市'; 120 | COMMENT ON COLUMN Students.StudState IS '学生州'; 121 | COMMENT ON COLUMN Students.StudZipCode IS '学生邮政编码'; 122 | COMMENT ON COLUMN Students.StudAreaCode IS '学生区号'; 123 | COMMENT ON COLUMN Students.StudPhoneNumber IS '学生电话号码'; 124 | COMMENT ON COLUMN Students.StudGPA IS '学生GPA'; 125 | COMMENT ON COLUMN Students.StudMajor IS '学生专业ID'; 126 | 127 | CREATE TABLE Subjects ( 128 | SubjectID INTEGER NOT NULL PRIMARY KEY, 129 | CategoryID TEXT, 130 | SubjectCode TEXT, 131 | SubjectName TEXT, 132 | SubjectPreReq TEXT DEFAULT NULL, 133 | SubjectDescription TEXT 134 | ); 135 | COMMENT ON TABLE Subjects IS '科目信息表'; 136 | COMMENT ON COLUMN Subjects.SubjectID IS '科目ID'; 137 | COMMENT ON COLUMN Subjects.CategoryID IS '类别ID'; 138 | COMMENT ON COLUMN Subjects.SubjectCode IS '科目代码'; 139 | COMMENT ON COLUMN Subjects.SubjectName IS '科目名称'; 140 | COMMENT ON COLUMN Subjects.SubjectPreReq IS '科目先修课程'; 141 | COMMENT ON COLUMN Subjects.SubjectDescription IS '科目描述'; 142 | 143 | CREATE TABLE Classes ( 144 | ClassID INTEGER NOT NULL PRIMARY KEY, 145 | SubjectID INTEGER DEFAULT 0, 146 | ClassRoomID INTEGER DEFAULT 0, 147 | Credits INTEGER DEFAULT 0, 148 | StartDate DATE, 149 | StartTime TIME, 150 | Duration INTEGER DEFAULT 0, 151 | MondaySchedule BOOLEAN NOT NULL DEFAULT FALSE, 152 | TuesdaySchedule BOOLEAN NOT NULL DEFAULT FALSE, 153 | WednesdaySchedule BOOLEAN NOT NULL DEFAULT FALSE, 154 | ThursdaySchedule BOOLEAN NOT NULL DEFAULT FALSE, 155 | FridaySchedule BOOLEAN NOT NULL DEFAULT FALSE, 156 | SaturdaySchedule BOOLEAN NOT NULL DEFAULT FALSE 157 | ); 158 | COMMENT ON TABLE Classes IS '课程信息表'; 159 | COMMENT ON COLUMN Classes.ClassID IS '课程ID'; 160 | COMMENT ON COLUMN Classes.SubjectID IS '科目ID'; 161 | COMMENT ON COLUMN Classes.ClassRoomID IS '教室ID'; 162 | COMMENT ON COLUMN Classes.Credits IS '学分'; 163 | COMMENT ON COLUMN Classes.StartDate IS '开始日期'; 164 | COMMENT ON COLUMN Classes.StartTime IS '开始时间'; 165 | COMMENT ON COLUMN Classes.Duration IS '持续时间'; 166 | COMMENT ON COLUMN Classes.MondaySchedule IS '周一课程安排'; 167 | COMMENT ON COLUMN Classes.TuesdaySchedule IS '周二课程安排'; 168 | COMMENT ON COLUMN Classes.WednesdaySchedule IS '周三课程安排'; 169 | COMMENT ON COLUMN Classes.ThursdaySchedule IS '周四课程安排'; 170 | COMMENT ON COLUMN Classes.FridaySchedule IS '周五课程安排'; 171 | COMMENT ON COLUMN Classes.SaturdaySchedule IS '周六课程安排'; 172 | 173 | CREATE TABLE Faculty_Categories ( 174 | StaffID INTEGER NOT NULL, 175 | CategoryID TEXT NOT NULL, 176 | PRIMARY KEY (StaffID, CategoryID) 177 | ); 178 | COMMENT ON TABLE Faculty_Categories IS '教职员工类别信息表'; 179 | COMMENT ON COLUMN Faculty_Categories.StaffID IS '员工ID'; 180 | COMMENT ON COLUMN Faculty_Categories.CategoryID IS '类别ID'; 181 | 182 | CREATE TABLE Faculty_Subjects ( 183 | StaffID INTEGER NOT NULL, 184 | SubjectID INTEGER NOT NULL, 185 | ProficiencyRating REAL DEFAULT 0, 186 | PRIMARY KEY (StaffID, SubjectID) 187 | ); 188 | COMMENT ON TABLE Faculty_Subjects IS '教职员工科目信息表'; 189 | COMMENT ON COLUMN Faculty_Subjects.StaffID IS '员工ID'; 190 | COMMENT ON COLUMN Faculty_Subjects.SubjectID IS '科目ID'; 191 | COMMENT ON COLUMN Faculty_Subjects.ProficiencyRating IS '专业能力评分'; 192 | 193 | CREATE TABLE Faculty_Classes ( 194 | ClassID INTEGER NOT NULL, 195 | StaffID INTEGER NOT NULL, 196 | PRIMARY KEY (ClassID, StaffID) 197 | ); 198 | COMMENT ON TABLE Faculty_Classes IS '教职员工授课信息表'; 199 | COMMENT ON COLUMN Faculty_Classes.ClassID IS '课程ID'; 200 | COMMENT ON COLUMN Faculty_Classes.StaffID IS '员工ID'; 201 | 202 | CREATE TABLE Student_Schedules ( 203 | StudentID INTEGER NOT NULL, 204 | ClassID INTEGER NOT NULL, 205 | ClassStatus INTEGER DEFAULT 0, 206 | Grade REAL DEFAULT 0, 207 | PRIMARY KEY (StudentID, ClassID) 208 | ); 209 | COMMENT ON TABLE Student_Schedules IS '学生课程安排信息表'; 210 | COMMENT ON COLUMN Student_Schedules.StudentID IS '学生ID'; 211 | COMMENT ON COLUMN Student_Schedules.ClassID IS '课程ID'; 212 | COMMENT ON COLUMN Student_Schedules.ClassStatus IS '班级状态'; 213 | COMMENT ON COLUMN Student_Schedules.Grade IS '成绩'; -------------------------------------------------------------------------------- /database/sqlite/music.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/database/sqlite/music.sqlite -------------------------------------------------------------------------------- /database/sqlite/school_scheduling.sqlite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/database/sqlite/school_scheduling.sqlite -------------------------------------------------------------------------------- /docs/demo_video.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/demo_video.png -------------------------------------------------------------------------------- /docs/screenshot-music-database.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-music-database.png -------------------------------------------------------------------------------- /docs/screenshot-question-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-1.png -------------------------------------------------------------------------------- /docs/screenshot-question-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-2.png -------------------------------------------------------------------------------- /docs/screenshot-question-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-3.png -------------------------------------------------------------------------------- /docs/screenshot-question-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-4.png -------------------------------------------------------------------------------- /docs/screenshot-question-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-5.png -------------------------------------------------------------------------------- /docs/screenshot-question-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-6.png -------------------------------------------------------------------------------- /docs/screenshot-question-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-7.png -------------------------------------------------------------------------------- /docs/screenshot-question-chinese.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-chinese.png -------------------------------------------------------------------------------- /docs/screenshot-question-korean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-korean.png -------------------------------------------------------------------------------- /docs/screenshot-school-scheduling-database.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-school-scheduling-database.png -------------------------------------------------------------------------------- /example.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | import uuid 5 | 6 | import pandas as pd 7 | from camel.embeddings import OpenAIEmbedding 8 | from camel.models import ModelFactory 9 | from camel.types import ModelPlatformType, ModelType 10 | from colorama import Fore 11 | from tabulate import tabulate 12 | 13 | from camel_database_agent import DatabaseAgent 14 | from camel_database_agent.database.manager import DatabaseManager 15 | from camel_database_agent.database_base import TrainLevel 16 | 17 | # Configure logging settings to show errors on stdout 18 | logging.basicConfig( 19 | level=logging.ERROR, 20 | format="%(message)s", 21 | handlers=[logging.StreamHandler(sys.stdout)], 22 | force=True, 23 | ) 24 | # Set specific logging level for the application module 25 | logging.getLogger("camel_database_agent").setLevel(logging.INFO) 26 | logger = logging.getLogger(__name__) 27 | 28 | # Configure pandas display options to show complete data 29 | pd.set_option("display.max_rows", None) # Show all rows 30 | pd.set_option("display.max_columns", None) # Show all columns 31 | pd.set_option("display.width", None) # Auto-detect display width 32 | pd.set_option("display.max_colwidth", None) # Show full content of each cell 33 | 34 | # Define database connection string 35 | database_url = "sqlite:///database/sqlite/music.sqlite" 36 | 37 | # Initialize the database agent with required components 38 | database_agent = DatabaseAgent( 39 | interactive_mode=True, 40 | database_manager=DatabaseManager(db_url=database_url), 41 | # Configure LLM model 42 | model=ModelFactory.create( 43 | model_platform=ModelPlatformType.OPENAI, 44 | model_type=ModelType.GPT_4O_MINI, 45 | api_key=os.getenv("OPENAI_API_KEY"), 46 | url=os.getenv("OPENAI_API_BASE_URL"), 47 | ), 48 | # Configure embedding model 49 | embedding_model=OpenAIEmbedding( 50 | api_key=os.getenv("OPENAI_API_KEY"), 51 | url=os.getenv("OPENAI_API_BASE_URL"), 52 | ), 53 | ) 54 | 55 | # Train agent's knowledge about the database schema 56 | database_agent.train_knowledge( 57 | # Training level for database knowledge extraction 58 | # MEDIUM level: Balances training time and knowledge depth by: 59 | # - Analyzing schema relationships 60 | # - Extracting representative sample data 61 | # - Generating a moderate number of query examples 62 | level=TrainLevel.MEDIUM, 63 | # Whether to retrain the knowledge base from scratch 64 | # If True: Forces regeneration of all database insights and examples 65 | # If False: Uses existing cached knowledge if available 66 | reset_train=False, 67 | ) 68 | 69 | # Display database overview information 70 | print(f"{Fore.GREEN}Database Overview") 71 | print("=" * 50) 72 | print(f"{database_agent.get_summary()}\n\n{Fore.RESET}") 73 | 74 | # Display recommended example questions 75 | print(f"{Fore.GREEN}Recommendation Question") 76 | print("=" * 50) 77 | print(f"{database_agent.get_recommendation_question()}\n\n{Fore.RESET}") 78 | 79 | # Execute a sample query using natural language 80 | response = database_agent.ask( 81 | session_id=str(uuid.uuid4()), question="List all playlists with more than 5 tracks" 82 | ) 83 | 84 | # Handle and display the query results 85 | if response.success: 86 | if response.dataset is not None: 87 | # Format successful results as a table 88 | data = tabulate(tabular_data=response.dataset, headers='keys', tablefmt='psql') 89 | print(f"{Fore.GREEN}{data}{Fore.RESET}") 90 | else: 91 | print(f"{Fore.GREEN}No results found.{Fore.RESET}") 92 | # Display the SQL that was generated 93 | print(f"{Fore.YELLOW}{response.sql}{Fore.RESET}") 94 | else: 95 | # Display error message if query failed 96 | print(f"{Fore.RED}+ {response.error}{Fore.RESET}") 97 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "camel-database-agent" 7 | version = "0.2.0" 8 | description = "An integration package connecting Database and Camel" 9 | authors = [{ name = "Lei Zhang" }] 10 | requires-python = ">=3.10,<3.13" 11 | readme = "README.md" 12 | license = "Apache-2.0" 13 | dependencies = [ 14 | "camel-ai[all]>=0.2.40", 15 | "tabulate>=0.9", 16 | "colorama>=0.4", 17 | "dotenv>=0.9", 18 | "PyMySQL==1.1.1", 19 | "psycopg>=3.2" 20 | ] 21 | 22 | [dependency-groups] 23 | test = [ 24 | "pytest>=7,<8", 25 | "mock>=5,<6", 26 | "pytest-asyncio>=0.23.0,<0.24", 27 | "testcontainers>=4.9" 28 | ] 29 | dev=[ 30 | "ruff>=0.7,<0.8", 31 | "mypy>=1.5.1,<2", 32 | "pre-commit>=3,<4", 33 | "types-tabulate>=0.9", 34 | "pandas-stubs>=2.2", 35 | "uv==0.6.5", 36 | ] 37 | 38 | [project.urls] 39 | Repository = "https://github.com/coolbeevip/camel-database-agent" 40 | 41 | [tool.uv] 42 | default-groups = [ 43 | "dev", "test" 44 | ] 45 | 46 | [tool.hatch.build.targets.sdist] 47 | include = ["camel_database_agent"] 48 | 49 | [tool.hatch.build.targets.wheel] 50 | include = ["camel_database_agent"] 51 | 52 | [tool.ruff] 53 | line-length = 99 54 | fix = true 55 | target-version = "py39" 56 | 57 | [tool.ruff.format] 58 | quote-style = "preserve" 59 | 60 | [tool.ruff.lint] 61 | extend-select = [ 62 | "I", # isort 63 | "B", # flake8-bugbear 64 | "C4", # flake8-comprehensions 65 | "PGH", # pygrep-hooks 66 | "RUF", # ruff 67 | "E", 68 | ] 69 | ignore = [ 70 | "B028", # Warning without stacklevel 71 | "B904", # use 'raise ... from err' 72 | "B905", # use explicit 'strict=' parameter with 'zip()' 73 | "N818", # Exception name should be named with an Error suffix 74 | "C416", # I think comprehension is more clear https://docs.astral.sh/ruff/rules/unnecessary-comprehension/ 75 | "C408", # we have used lots of dict(...) instead of literal 76 | ] 77 | 78 | [tool.ruff.lint.pydocstyle] 79 | convention = "google" 80 | 81 | [tool.ruff.lint.isort] 82 | known-first-party = ["camel_database_agent"] 83 | 84 | [tool.ruff.lint.per-file-ignores] 85 | "camel_database_agent/database_prompt.py" = ["E501"] 86 | "camel_database_agent/datagen/prompts.py" = ["E501"] 87 | "camel_database_agent/database/prompts.py" = ["E501"] 88 | 89 | [tool.pytest.ini_options] 90 | pythonpath = ["."] 91 | addopts = ["--strict-markers"] 92 | 93 | [tool.coverage.report] 94 | include_namespace_packages = true 95 | 96 | [tool.mypy] 97 | exclude = [ 98 | '\.venv/.*', # exclude .venv directory 99 | 'site-packages/.*', # exclude site-packages 100 | ] 101 | 102 | [[tool.mypy.overrides]] 103 | module = [ 104 | "camel.*", 105 | "colorama", 106 | "pandas", 107 | "qdrant_client.*", 108 | "tabulate" 109 | ] 110 | ignore_missing_imports = true -------------------------------------------------------------------------------- /scripts/check_imports.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import traceback 3 | from importlib.machinery import SourceFileLoader 4 | 5 | if __name__ == "__main__": 6 | files = sys.argv[1:] 7 | has_failure = False 8 | for file in files: 9 | try: 10 | SourceFileLoader("x", file).load_module() 11 | except Exception: 12 | has_failure = True 13 | print(file) 14 | traceback.print_exc() 15 | print() 16 | 17 | sys.exit(1 if has_failure else 0) 18 | -------------------------------------------------------------------------------- /scripts/lint_imports.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eu 4 | 5 | # Initialize a variable to keep track of errors 6 | errors=0 7 | 8 | # make sure not importing from langchain or langchain_experimental 9 | git --no-pager grep '^from langchain\.' . && errors=$((errors+1)) 10 | git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1)) 11 | 12 | # Decide on an exit status based on the errors 13 | if [ "$errors" -gt 0 ]; then 14 | exit 1 15 | else 16 | exit 0 17 | fi -------------------------------------------------------------------------------- /spider2_lite/README.md: -------------------------------------------------------------------------------- 1 | Download local_sqlite.zip 2 | 3 | https://drive.usercontent.google.com/download?id=1coEVsCZq-Xvj9p2TnhBFoFTsY-UoYGmG&authuser=0 4 | 5 | Download spider2-lite.jsonl 6 | 7 | https://github.com/xlang-ai/Spider2/blob/main/spider2-lite/spider2-lite.jsonl -------------------------------------------------------------------------------- /spider2_lite/database/README.md: -------------------------------------------------------------------------------- 1 | Download the database file local_sqlite.zip from the following link and extract it to the current directory. 2 | 3 | https://drive.usercontent.google.com/download?id=1coEVsCZq-Xvj9p2TnhBFoFTsY-UoYGmG&authuser=0 -------------------------------------------------------------------------------- /spider2_lite/spider2_run.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import sys 5 | 6 | from camel.models import ModelFactory 7 | from camel.types import ModelPlatformType 8 | from colorama import Fore 9 | from dotenv import load_dotenv 10 | 11 | from camel_database_agent import DatabaseAgent 12 | from camel_database_agent.database_base import TrainLevel 13 | 14 | load_dotenv() 15 | 16 | """设置日志""" 17 | logging.basicConfig( 18 | level=logging.FATAL, 19 | format="%(message)s", 20 | handlers=[logging.StreamHandler(sys.stdout)], 21 | force=True, 22 | ) 23 | logging.getLogger("camel_database_agent").setLevel(logging.INFO) 24 | logger = logging.getLogger(__name__) 25 | 26 | model = ModelFactory.create( 27 | model_platform=ModelPlatformType.OPENAI_COMPATIBLE_MODEL, 28 | model_type=os.environ["MODEL_NAME"], 29 | api_key=os.environ["OPENAI_API_KEY"], 30 | url=os.environ["OPENAI_API_BASE_URL"], 31 | ) 32 | 33 | current_dir = os.path.dirname(os.path.abspath(__file__)) 34 | spider2_sqlite_database = os.path.join(current_dir, "database", "local_sqlite") 35 | spider2_workspace = os.path.join(current_dir, "workspace", os.environ["MODEL_NAME"]) 36 | if not os.path.exists(spider2_workspace): 37 | os.makedirs(spider2_workspace) 38 | else: 39 | pass 40 | # shutil.rmtree(spider2_workspace) 41 | # os.makedirs(spider2_workspace) 42 | 43 | with open(os.path.join(current_dir, "spider2-lite.jsonl"), "r") as f: 44 | examples = [json.loads(line) for line in f] 45 | examples = [example for example in examples if example["instance_id"].startswith("local")] 46 | examples = sorted(examples, key=lambda x: x["db"]) 47 | 48 | database_agent_cache = {} 49 | success = 0 50 | failed = 0 51 | total = len(examples) 52 | for example in examples: 53 | try: 54 | instance_id = example["instance_id"] 55 | db_id = example["db"] 56 | question = example["question"] 57 | db_path = os.path.join(spider2_sqlite_database, f"{db_id}.sqlite") 58 | if not os.path.exists(db_path): 59 | print(f"{Fore.RED}database {db_id} not found{Fore.RESET}") 60 | continue 61 | if db_id not in database_agent_cache: 62 | database_agent_cache[db_id] = database_agent = DatabaseAgent( 63 | db_url=f"sqlite:///{db_path}", 64 | model=model, 65 | data_path=os.path.join(str(spider2_workspace), db_id), 66 | ) 67 | database_agent.train_knowledge( 68 | level=TrainLevel.LOW, 69 | reset_train=False, 70 | ) 71 | database_agent = database_agent_cache[db_id] 72 | response = database_agent.ask( 73 | session_id=instance_id, 74 | question=question, 75 | ) 76 | 77 | example["sql"] = response.sql 78 | if response.success: 79 | example["result"] = True 80 | success += 1 81 | else: 82 | example["result"] = False 83 | failed += 1 84 | except Exception as e: 85 | print(f"{Fore.RED}{e!s}{Fore.RESET}") 86 | finally: 87 | print(f"process {success + failed}/{total}") 88 | 89 | with open(os.path.join(spider2_workspace, "spider2-lite-result.jsonl"), "a") as f: 90 | f.write(json.dumps(example) + "\n") 91 | 92 | print( 93 | f"success: {success}, failed: {failed}, total: {total}", 94 | ) 95 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/tests/__init__.py -------------------------------------------------------------------------------- /tests/integration_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/tests/integration_tests/__init__.py -------------------------------------------------------------------------------- /tests/integration_tests/data.sql: -------------------------------------------------------------------------------- 1 | -- 插入用户数据 2 | INSERT INTO `user` (`username`, `password_hash`, `mobile`, `email`) VALUES 3 | ('张三', 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', '13800138000', 'john@example.com'), 4 | ('李四', '5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8', '13912345678', 'jane@test.com'), 5 | ('王五', '6cf615d5bcaac778352a8f1f3360d23f02f34ec182e259897fd6ce485d7870d4', '13698765432', NULL); 6 | 7 | -- 插入商品数据 8 | INSERT INTO `product` (`product_name`, `description`, `price`, `stock`) VALUES 9 | ('iPhone 15', '6.1英寸 128GB 黑色', 6999.00, 100), 10 | ('小米电视65寸', '4K超高清智能电视', 3299.00, 50), 11 | ('华为MateBook X', '13英寸轻薄笔记本', 8999.00, 30), 12 | ('耐克运动鞋', 'Air Max 270 男款', 899.00, 200), 13 | ('《深入浅出MySQL》', '数据库技术书籍', 99.90, 500); 14 | 15 | -- 插入用户地址(每个用户1个地址) 16 | INSERT INTO `user_address` (`user_id`, `receiver`, `phone`, `province`, `city`, `district`, `detail_address`) VALUES 17 | (1, '张三', '13800138000', '北京市', '市辖区', '朝阳区', '建国路88号'), 18 | (2, '李四', '13912345678', '上海市', '市辖区', '浦东新区', '陆家嘴环路100号'), 19 | (3, '王五', '13698765432', '广东省', '深圳市', '南山区', '科技园路1号'); 20 | 21 | -- 插入订单数据(3个订单) 22 | INSERT INTO `order_info` (`user_id`, `order_no`, `total_amount`, `order_status`, `payment_method`, `address_id`, `payment_time`) VALUES 23 | (1, 'ORDER202311011001', 13998.00, 1, 'ALIPAY', 1, '2023-11-01 10:05:00'), 24 | (2, 'ORDER202311021002', 12197.00, 1, 'WECHAT', 2, '2023-11-02 14:30:00'), 25 | (3, 'ORDER202311031003', 199.80, 0, NULL, 3, NULL); 26 | 27 | -- 插入订单明细 28 | INSERT INTO `order_item` (`order_id`, `product_id`, `quantity`, `unit_price`) VALUES 29 | -- 订单1(购买2个iPhone) 30 | (1, 1, 2, 6999.00), 31 | -- 订单2(1台电视+1本书) 32 | (2, 2, 1, 3299.00), 33 | (2, 5, 2, 99.90), 34 | -- 订单3(2双运动鞋) 35 | (3, 4, 2, 899.00); 36 | 37 | -- 插入支付信息(已完成支付的订单) 38 | INSERT INTO `payment_info` (`order_id`, `transaction_no`, `payment_status`, `payment_amount`, `payment_method`, `payment_time`) VALUES 39 | (1, 'PAY20231101123456', 'SUCCESS', 13998.00, 'ALIPAY', '2023-11-01 10:05:00'), 40 | (2, 'PAY20231102567890', 'SUCCESS', 12197.00, 'WECHAT', '2023-11-02 14:30:00'); -------------------------------------------------------------------------------- /tests/integration_tests/ddl.sql: -------------------------------------------------------------------------------- 1 | -- 用户表 2 | CREATE TABLE user ( 3 | user_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, -- 用户ID 4 | username TEXT NOT NULL UNIQUE, -- 用户名 5 | password_hash TEXT NOT NULL, -- 密码哈希值 6 | mobile TEXT NOT NULL UNIQUE, -- 手机号 7 | email TEXT, -- 邮箱 8 | created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 创建时间 9 | updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP -- 更新时间 10 | ); -- 用户信息表 11 | 12 | -- 商品表 13 | CREATE TABLE product ( 14 | product_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, -- 商品ID 15 | product_name TEXT NOT NULL, -- 商品名称 16 | description TEXT, -- 商品描述 17 | price REAL NOT NULL, -- 商品单价 18 | stock INTEGER NOT NULL DEFAULT 0, -- 库存数量 19 | status INTEGER NOT NULL DEFAULT 1, -- 状态(1:上架 0:下架) 20 | created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 创建时间 21 | updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP -- 更新时间 22 | ); -- 商品信息表 23 | 24 | -- 订单表 25 | CREATE TABLE order_info ( 26 | order_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, -- 订单ID 27 | user_id INTEGER NOT NULL, -- 用户ID 28 | order_no TEXT NOT NULL UNIQUE, -- 订单编号(业务唯一) 29 | total_amount REAL NOT NULL, -- 订单总金额 30 | order_status INTEGER NOT NULL DEFAULT 0, -- 订单状态(0:待支付 1:已支付 2:已发货 3:已完成 4:已取消) 31 | payment_method TEXT, -- 支付方式 32 | address_id INTEGER NOT NULL, -- 收货地址ID 33 | payment_time DATETIME, -- 支付时间 34 | created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 创建时间 35 | updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 更新时间 36 | FOREIGN KEY (user_id) REFERENCES user (user_id) ON DELETE RESTRICT ON UPDATE CASCADE, 37 | FOREIGN KEY (address_id) REFERENCES user_address (address_id) ON DELETE RESTRICT ON UPDATE CASCADE 38 | ); -- 订单主表 39 | 40 | -- 订单明细表 41 | CREATE TABLE order_item ( 42 | item_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, -- 明细ID 43 | order_id INTEGER NOT NULL, -- 订单ID 44 | product_id INTEGER NOT NULL, -- 商品ID 45 | quantity INTEGER NOT NULL, -- 购买数量 46 | unit_price REAL NOT NULL, -- 成交单价 47 | total_price REAL AS (quantity * unit_price) STORED, -- 明细总价 48 | FOREIGN KEY (order_id) REFERENCES order_info (order_id) ON DELETE CASCADE ON UPDATE CASCADE, 49 | FOREIGN KEY (product_id) REFERENCES product (product_id) ON DELETE RESTRICT ON UPDATE CASCADE 50 | ); -- 订单明细表 51 | 52 | -- 用户地址表 53 | CREATE TABLE user_address ( 54 | address_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, -- 地址ID 55 | user_id INTEGER NOT NULL, -- 用户ID 56 | receiver TEXT NOT NULL, -- 收货人 57 | phone TEXT NOT NULL, -- 联系电话 58 | province TEXT NOT NULL, -- 省 59 | city TEXT NOT NULL, -- 市 60 | district TEXT NOT NULL, -- 区 61 | detail_address TEXT NOT NULL, -- 详细地址 62 | is_default INTEGER NOT NULL DEFAULT 0, -- 是否默认地址 63 | created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 创建时间 64 | updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 更新时间 65 | FOREIGN KEY (user_id) REFERENCES user (user_id) ON DELETE CASCADE ON UPDATE CASCADE 66 | ); -- 用户地址表 67 | 68 | -- 支付信息表 69 | CREATE TABLE payment_info ( 70 | payment_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, -- 支付ID 71 | order_id INTEGER NOT NULL, -- 订单ID 72 | transaction_no TEXT NOT NULL UNIQUE, -- 第三方交易号 73 | payment_status TEXT NOT NULL, -- 支付状态 74 | payment_amount REAL NOT NULL, -- 实际支付金额 75 | payment_method TEXT NOT NULL, -- 支付方式 76 | payment_time DATETIME NOT NULL, -- 支付时间 77 | created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 创建时间 78 | FOREIGN KEY (order_id) REFERENCES order_info (order_id) ON DELETE RESTRICT ON UPDATE CASCADE 79 | ); -- 支付信息表 -------------------------------------------------------------------------------- /tests/integration_tests/query.md: -------------------------------------------------------------------------------- 1 | ## 请列出所有已支付订单的详细信息。 2 | SELECT o.order_no, o.total_amount, p.payment_status FROM order_info o JOIN payment_info p ON o.order_id = p.order_id WHERE o.order_status = 1; 3 | 4 | ## 哪些用户在最近一个月内下过订单? 5 | SELECT DISTINCT u.username FROM user u JOIN order_info o ON u.user_id = o.user_id WHERE o.created_at >= DATE('now', '-1 month'); 6 | 7 | ## 我想知道每种支付方式的订单数量。 8 | SQL查询: SELECT payment_method, COUNT(*) AS order_count FROM order_info GROUP BY payment_method; -------------------------------------------------------------------------------- /tests/integration_tests/test_database_agent.py: -------------------------------------------------------------------------------- 1 | import os 2 | import uuid 3 | from unittest import TestCase 4 | 5 | from camel.models import ModelFactory 6 | from camel.types import ModelPlatformType, ModelType 7 | from dotenv import load_dotenv 8 | 9 | from camel_database_agent import DatabaseAgent 10 | from camel_database_agent.database.manager import DatabaseManager 11 | from camel_database_agent.database_base import MessageLogToEmpty 12 | 13 | load_dotenv("../../.env") # isort:skip 14 | 15 | 16 | class TestDatabaseAgent(TestCase): 17 | database_manager: DatabaseManager 18 | 19 | @classmethod 20 | def setUpClass(cls) -> None: 21 | current_dir = os.path.dirname(os.path.abspath(__file__)) 22 | cls.database_manager = DatabaseManager(db_url="sqlite:///:memory:") 23 | with open(os.path.join(current_dir, "ddl.sql"), "r") as f: 24 | cls.database_manager.execute(f.read(), ignore_sql_check=True) 25 | with open(os.path.join(current_dir, "data.sql"), "r") as f: 26 | cls.database_manager.execute(f.read(), ignore_sql_check=True) 27 | 28 | def test_ask(self) -> None: 29 | database_agent = DatabaseAgent( 30 | database_manager=self.database_manager, 31 | model=ModelFactory.create( 32 | model_platform=ModelPlatformType.DEFAULT, 33 | model_type=ModelType.DEFAULT, 34 | ), 35 | language="Chinese", 36 | ) 37 | 38 | database_agent.train_knowledge(reset_train=True) 39 | database_agent.ask( 40 | session_id=str(uuid.uuid4()), 41 | question="查询最近30天内成功支付用户的订单详情,包括用户信息、订单信息、支付状态、订单明细以及收货地址", 42 | message_log=MessageLogToEmpty(), 43 | ) 44 | -------------------------------------------------------------------------------- /tests/integration_tests/test_database_knowledge.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import TestCase 3 | 4 | from camel.embeddings import OpenAIEmbedding 5 | from camel.models import ModelFactory 6 | from camel.types import ModelPlatformType, ModelType 7 | from dotenv import load_dotenv 8 | 9 | from camel_database_agent import DataQueryInferencePipeline 10 | from camel_database_agent.database.manager import DatabaseManager 11 | from camel_database_agent.database.schema import ( 12 | DatabaseSchemaParse, 13 | ) 14 | from camel_database_agent.knowledge.knowledge import DatabaseKnowledge 15 | from camel_database_agent.knowledge.knowledge_qdrant import ( 16 | DatabaseKnowledgeQdrant, 17 | ) 18 | 19 | load_dotenv("../../.env") # isort:skip 20 | 21 | 22 | class TestDatabaseKnowledge(TestCase): 23 | parse: DatabaseSchemaParse 24 | knowledge: DatabaseKnowledge 25 | database_manager: DatabaseManager 26 | 27 | @classmethod 28 | def setUpClass(cls) -> None: 29 | model = ModelFactory.create( 30 | model_platform=ModelPlatformType.DEFAULT, 31 | model_type=ModelType.DEFAULT, 32 | ) 33 | cls.database_manager = DatabaseManager(db_url="sqlite:///:memory:") 34 | cls.parse = DatabaseSchemaParse( 35 | database_manager=cls.database_manager, 36 | model=model, 37 | ) 38 | cls.knowledge = DatabaseKnowledgeQdrant(embedding=OpenAIEmbedding(), model=model) 39 | 40 | def test_qdrant_with_ddl(self) -> None: 41 | current_dir = os.path.dirname(os.path.abspath(__file__)) 42 | with open(os.path.join(current_dir, "ddl.sql"), "r") as f: 43 | self.knowledge.add(self.parse.parse_ddl_record(f.read()).data) 44 | 45 | ddl_records = self.knowledge.query_ddl("查询用户表中的所有用户信息", top_k=2) 46 | assert len(ddl_records) == 2 47 | 48 | def test_qdrant_with_query(self) -> None: 49 | current_dir = os.path.dirname(os.path.abspath(__file__)) 50 | with open(os.path.join(current_dir, "ddl.sql"), "r") as f: 51 | ddl_sql = f.read() 52 | self.database_manager.execute(ddl_sql, ignore_sql_check=True) 53 | with open(os.path.join(current_dir, "data.sql"), "r") as f: 54 | data_sql = f.read() 55 | self.database_manager.execute(data_sql, ignore_sql_check=True) 56 | 57 | pipeline = DataQueryInferencePipeline( 58 | ddl_sql=ddl_sql, 59 | data_sql=data_sql, 60 | model=ModelFactory.create( 61 | model_platform=ModelPlatformType.DEFAULT, 62 | model_type=ModelType.DEFAULT, 63 | ), 64 | database_manager=self.database_manager, 65 | ) 66 | query_records = pipeline.generate(10).data 67 | self.knowledge.add(records=query_records) 68 | assert len(self.knowledge.get_query_collection_sample(5)) == 5 69 | -------------------------------------------------------------------------------- /tests/integration_tests/test_database_manager.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import TestCase 3 | 4 | from dotenv import load_dotenv 5 | 6 | from camel_database_agent.database.manager import DatabaseManager, SQLExecutionError 7 | 8 | load_dotenv() # isort:skip 9 | 10 | 11 | class TestDatabaseManager(TestCase): 12 | database_manager: DatabaseManager 13 | 14 | @classmethod 15 | def setUpClass(cls) -> None: 16 | cls.database_manager = DatabaseManager(db_url="sqlite:///:memory:") 17 | current_dir = os.path.dirname(os.path.abspath(__file__)) 18 | with open(os.path.join(current_dir, "ddl.sql"), "r") as f: 19 | cls.database_manager.execute(f.read(), ignore_sql_check=True) 20 | with open(os.path.join(current_dir, "data.sql"), "r") as f: 21 | cls.database_manager.execute(f.read(), ignore_sql_check=True) 22 | 23 | def test_select_with_fail(self) -> None: 24 | sql = "select * from no_exist_table" 25 | with self.assertRaises(SQLExecutionError) as context: 26 | self.database_manager.select(sql) 27 | assert context.exception.sql == sql 28 | -------------------------------------------------------------------------------- /tests/integration_tests/test_database_schema_dialect.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import TestCase 3 | 4 | from camel.models import ModelFactory 5 | from camel.types import ModelPlatformType, ModelType 6 | from colorama import Fore 7 | from dotenv import load_dotenv 8 | 9 | from camel_database_agent.database.dialect.dialect import ( 10 | DatabaseSchemaDialect, 11 | ) 12 | from camel_database_agent.database.dialect.dialect_sqlite import ( 13 | DatabaseSchemaDialectSqlite, 14 | ) 15 | from camel_database_agent.database.manager import DatabaseManager 16 | 17 | load_dotenv("../../.env") # isort:skip 18 | 19 | 20 | class TestDatabaseSchemaDialect(TestCase): 21 | dialect: DatabaseSchemaDialect 22 | 23 | @classmethod 24 | def setUpClass(cls) -> None: 25 | model = ModelFactory.create( 26 | model_platform=ModelPlatformType.DEFAULT, 27 | model_type=ModelType.DEFAULT, 28 | ) 29 | database_manager = DatabaseManager(db_url="sqlite:///:memory:") 30 | current_dir = os.path.dirname(os.path.abspath(__file__)) 31 | with open(os.path.join(current_dir, "ddl.sql"), "r") as f: 32 | database_manager.execute(f.read(), ignore_sql_check=True) 33 | with open(os.path.join(current_dir, "data.sql"), "r") as f: 34 | database_manager.execute(f.read(), ignore_sql_check=True) 35 | 36 | DatabaseSchemaDialect.register(DatabaseSchemaDialectSqlite) 37 | 38 | cls.dialect = DatabaseSchemaDialect.get_dialect( 39 | dialect_name=database_manager.dialect_name(), 40 | database_manager=database_manager, 41 | model=model, 42 | ) 43 | 44 | def test_get_schema(self) -> None: 45 | ddl_sql = self.dialect.get_schema() 46 | print(Fore.GREEN + ddl_sql) 47 | assert ddl_sql is not None 48 | 49 | polished_ddl_sql = self.dialect.get_polished_schema() 50 | print(Fore.BLUE + polished_ddl_sql) 51 | assert polished_ddl_sql is not None 52 | assert ddl_sql != polished_ddl_sql 53 | 54 | def test_sampled_data(self) -> None: 55 | sample_sql = self.dialect.get_sampled_data() 56 | print(Fore.GREEN + sample_sql) 57 | assert sample_sql is not None 58 | -------------------------------------------------------------------------------- /tests/integration_tests/test_database_schema_parse.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import TestCase 3 | 4 | from camel.models import ModelFactory 5 | from camel.types import ModelPlatformType, ModelType 6 | from dotenv import load_dotenv 7 | 8 | from camel_database_agent.database.manager import DatabaseManager 9 | from camel_database_agent.database.schema import ( 10 | DatabaseSchemaParse, 11 | SchemaParseResponse, 12 | ) 13 | 14 | load_dotenv("../../.env") # isort:skip 15 | 16 | 17 | class TestDatabaseSchemaParse(TestCase): 18 | parse: DatabaseSchemaParse 19 | 20 | @classmethod 21 | def setUpClass(cls) -> None: 22 | model = ModelFactory.create( 23 | model_platform=ModelPlatformType.DEFAULT, 24 | model_type=ModelType.DEFAULT, 25 | ) 26 | cls.parse = DatabaseSchemaParse( 27 | database_manager=DatabaseManager(db_url="sqlite:///:memory:"), 28 | model=model, 29 | ) 30 | 31 | def test_parse_ddl_record(self) -> None: 32 | current_dir = os.path.dirname(os.path.abspath(__file__)) 33 | with open(os.path.join(current_dir, "ddl.sql"), "r") as f: 34 | schema_parse_response: SchemaParseResponse = self.parse.parse_ddl_record(f.read()) 35 | assert len(schema_parse_response.data) == 6 36 | 37 | def test_parse_dml_record(self) -> None: 38 | current_dir = os.path.dirname(os.path.abspath(__file__)) 39 | with open(os.path.join(current_dir, "data.sql"), "r") as f: 40 | schema_parse_response: SchemaParseResponse = self.parse.parse_sampled_record(f.read()) 41 | assert len(schema_parse_response.data) == 6 42 | 43 | def test_parse_query_record(self) -> None: 44 | current_dir = os.path.dirname(os.path.abspath(__file__)) 45 | with open(os.path.join(current_dir, "query.md"), "r") as f: 46 | schema_parse_response: SchemaParseResponse = self.parse.parse_query_record(f.read()) 47 | assert len(schema_parse_response.data) == 3 48 | -------------------------------------------------------------------------------- /tests/integration_tests/test_sql_query_inference_pipeline.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import TestCase 3 | 4 | from camel.models import ModelFactory 5 | from camel.types import ModelPlatformType, ModelType 6 | from dotenv import load_dotenv 7 | 8 | from camel_database_agent import DataQueryInferencePipeline 9 | from camel_database_agent.database.manager import DatabaseManager 10 | 11 | load_dotenv("../../.env") # isort:skip 12 | 13 | 14 | class TestDataQueryInferencePipeline(TestCase): 15 | pipeline: DataQueryInferencePipeline 16 | 17 | @classmethod 18 | def setUpClass(cls) -> None: 19 | database_manager = DatabaseManager(db_url="sqlite:///:memory:") 20 | current_dir = os.path.dirname(os.path.abspath(__file__)) 21 | with open(os.path.join(current_dir, "ddl.sql"), "r") as f: 22 | ddl_sql = f.read() 23 | database_manager.execute(ddl_sql, ignore_sql_check=True) 24 | with open(os.path.join(current_dir, "data.sql"), "r") as f: 25 | data_sql = f.read() 26 | database_manager.execute(data_sql, ignore_sql_check=True) 27 | 28 | cls.pipeline = DataQueryInferencePipeline( 29 | ddl_sql=ddl_sql, 30 | data_sql=data_sql, 31 | model=ModelFactory.create( 32 | model_platform=ModelPlatformType.DEFAULT, 33 | model_type=ModelType.DEFAULT, 34 | ), 35 | database_manager=database_manager, 36 | ) 37 | 38 | def test_generate(self) -> None: 39 | schema_response = self.pipeline.generate(10) 40 | assert len(schema_response.data) == 10 41 | -------------------------------------------------------------------------------- /tests/unit_tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/tests/unit_tests/__init__.py --------------------------------------------------------------------------------