├── .github
    └── workflows
    │   ├── pr.yml
    │   └── release.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── LICENSE_HEADER.tpl
├── Makefile
├── README.md
├── add_license_headers.sh
├── camel_database_agent
    ├── __init__.py
    ├── cli.py
    ├── core
    │   ├── __init__.py
    │   ├── exceptions.py
    │   └── method_lru_cache.py
    ├── database
    │   ├── __init__.py
    │   ├── dialect
    │   │   ├── __init__.py
    │   │   ├── dialect.py
    │   │   ├── dialect_mysql.py
    │   │   ├── dialect_postgresql.py
    │   │   └── dialect_sqlite.py
    │   ├── manager.py
    │   ├── prompts.py
    │   └── schema.py
    ├── database_agent.py
    ├── database_base.py
    ├── database_prompt.py
    ├── datagen
    │   ├── __init__.py
    │   ├── pipeline.py
    │   └── prompts.py
    ├── knowledge
    │   ├── __init__.py
    │   ├── knowledge.py
    │   └── knowledge_qdrant.py
    └── py.typed
├── conftest.py
├── database
    ├── mysql
    │   ├── 1_ddl_sql.sql
    │   └── 2_data_sql.sql
    ├── postgresql
    │   ├── 1_ddl_sql.sql
    │   └── 2_data_sql.sql
    └── sqlite
    │   ├── music.sqlite
    │   └── school_scheduling.sqlite
├── docs
    ├── demo_video.png
    ├── screenshot-music-database.png
    ├── screenshot-question-1.png
    ├── screenshot-question-2.png
    ├── screenshot-question-3.png
    ├── screenshot-question-4.png
    ├── screenshot-question-5.png
    ├── screenshot-question-6.png
    ├── screenshot-question-7.png
    ├── screenshot-question-chinese.png
    ├── screenshot-question-korean.png
    └── screenshot-school-scheduling-database.png
├── example.py
├── pyproject.toml
├── scripts
    ├── check_imports.py
    └── lint_imports.sh
├── spider2_lite
    ├── README.md
    ├── database
    │   └── README.md
    ├── spider2-lite-back.jsonl
    ├── spider2-lite.jsonl
    └── spider2_run.py
├── tests
    ├── __init__.py
    ├── integration_tests
    │   ├── __init__.py
    │   ├── data.sql
    │   ├── ddl.sql
    │   ├── query.md
    │   ├── test_database_agent.py
    │   ├── test_database_knowledge.py
    │   ├── test_database_manager.py
    │   ├── test_database_schema_dialect.py
    │   ├── test_database_schema_parse.py
    │   └── test_sql_query_inference_pipeline.py
    └── unit_tests
    │   └── __init__.py
└── uv.lock


/.github/workflows/pr.yml:
--------------------------------------------------------------------------------
 1 | name: Python application
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["main"]
 6 |   pull_request:
 7 |     branches: ["main"]
 8 | 
 9 | permissions:
10 |   contents: read
11 | 
12 | jobs:
13 |   lint-and-test:
14 |     runs-on: ubuntu-latest
15 |     env:
16 |       OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
17 |     steps:
18 |     - uses: actions/checkout@v4
19 |     
20 |     - name: Set up Python 3.10
21 |       uses: actions/setup-python@v4
22 |       with:
23 |         python-version: "3.10"
24 |         cache: 'pip'
25 |     
26 |     - name: Cache virtual environment
27 |       uses: actions/cache@v3
28 |       with:
29 |         path: .venv
30 |         key: venv-${{ runner.os }}-python-${{ hashFiles('pyproject.toml') }}-v2
31 |         restore-keys: |
32 |           venv-${{ runner.os }}-python-v2
33 |     
34 |     - name: Install dependencies
35 |       run: |
36 |         python -m pip install --upgrade pip
37 |         pip install uv ruff mypy
38 |         uv venv .venv --python=3.10
39 |         uv sync --all-extras
40 |     
41 |     - name: Lint
42 |       run: |
43 |         source .venv/bin/activate
44 |         make lint
45 |     
46 |     - name: Run tests
47 |       run: |
48 |         source .venv/bin/activate
49 |         make tests


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | permissions:
 8 |   contents: read
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v3
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v3
19 |       with:
20 |         python-version: '3.10'
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip
24 |         pip install build
25 |     - name: Build package
26 |       run: python -m build
27 |     - name: Publish package
28 |       uses: pypa/gh-action-pypi-publish@release/v1
29 |       with:
30 |         user: __token__
31 |         password: ${{ secrets.PYPI_API_TOKEN }}
32 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .env
 2 | .env_*
 3 | .idea
 4 | __pycache__
 5 | .DS_Store
 6 | .mypy_cache
 7 | .mypy_cache_*
 8 | .venv
 9 | 
10 | spider2_lite/database/local_sqlite
11 | spider2_lite/workspace
12 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/astral-sh/ruff-pre-commit
 3 |     rev: 'v0.7.4'
 4 |     hooks:
 5 |       - id: ruff
 6 |         args: [--fix, --exit-non-zero-on-fix, --show-fixes]
 7 |         types: [python]
 8 |       - id: ruff-format
 9 |         types: [python]
10 |   - repo: local
11 |     hooks:
12 |       - id: mypy
13 |         name: Check mypy
14 |         entry: mypy --namespace-packages -p camel_database_agent -p tests
15 |         language: python
16 |         types: [python]
17 |         pass_filenames: false
18 |         require_serial: true
19 |         always_run: true
20 |         verbose: true


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.


--------------------------------------------------------------------------------
/LICENSE_HEADER.tpl:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all format lint test tests integration_tests docker_tests help extended_tests
 2 | 
 3 | # Default target executed when no arguments are given to make.
 4 | all: help
 5 | 
 6 | ######################
 7 | # LINTING AND FORMATTING
 8 | ######################
 9 | 
10 | # Define a variable for Python and notebook files.
11 | PYTHON_FILES=.
12 | MYPY_CACHE=.mypy_cache
13 | lint format: PYTHON_FILES=.
14 | lint_package: PYTHON_FILES=camel_database_agent
15 | lint_tests: PYTHON_FILES=tests
16 | lint_tests: MYPY_CACHE=.mypy_cache_test
17 | 
18 | lint lint_diff lint_package lint_tests:
19 | 	ruff -V
20 | 	mypy -V
21 | 	[ "$(PYTHON_FILES)" = "" ] || ruff check $(PYTHON_FILES)
22 | 	[ "$(PYTHON_FILES)" = "" ] || ruff format $(PYTHON_FILES) --diff
23 | 	[ "$(PYTHON_FILES)" = "" ] || mkdir -p $(MYPY_CACHE) && mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE)
24 | 
25 | format format_diff:
26 | 	[ "$(PYTHON_FILES)" = "" ] || ruff format $(PYTHON_FILES)
27 | 	[ "$(PYTHON_FILES)" = "" ] || ruff check --select I --fix $(PYTHON_FILES)
28 | 
29 | spell_check:
30 | 	codespell --toml pyproject.toml
31 | 
32 | spell_fix:
33 | 	codespell --toml pyproject.toml -w
34 | 
35 | check_imports: $(shell find camel_database_agent -name '*.py')
36 | 	python ./scripts/check_imports.py $^
37 | 
38 | test:
39 | 	pytest tests
40 | 
41 | ######################
42 | # HELP
43 | ######################
44 | 
45 | help:
46 | 	@echo '----'
47 | 	@echo 'check_imports				- check imports'
48 | 	@echo 'format                       - run code formatters'
49 | 	@echo 'lint                         - run linters'
50 | 	@echo 'tests                        - run unit tests'


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # CAMEL DatabaseAgent
  2 | 
  3 | [![License](https://img.shields.io/badge/License-Apache_2.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
  4 | [![PyPi version](https://img.shields.io/pypi/v/camel-database-agent.svg)](https://pypi.org/project/camel-database-agent/)
  5 | [![build](https://github.com/coolbeevip/camel-database-agent/actions/workflows/pr.yml/badge.svg)](https://github.com/coolbeevip/camel-database-agent/actions/workflows/pr.yml)
  6 | [![](https://img.shields.io/pypi/dm/camel-database-agent)](https://pypi.org/project/camel-database-agent/)
  7 | 
  8 | An open-source toolkit helping developers build natural language database query solutions based on [CAMEL-AI](https://github.com/camel-ai/camel).
  9 | 
 10 | ## Core Components
 11 | 
 12 | - **DataQueryInferencePipeline**: A pipeline that transforms database schema and sample data into query few-shot examples (questions and corresponding SQL)
 13 | - **DatabaseKnowledge**: A vector database storing database schema, sample data, and query few-shot examples
 14 | - **DatabaseAgent**: An intelligent agent based on the CAMEL framework that utilizes DatabaseKnowledge to answer user questions
 15 | 
 16 | Features:
 17 | 
 18 | - [x] Read-Only mode
 19 | - [x] SQLite
 20 | - [x] MySQL
 21 | - [x] PostgreSQL  
 22 | - [ ] Spider 2.0-Lite evaluation (planned)
 23 | 
 24 | ## Quick Start
 25 | 
 26 | Clone the repository and install the dependencies.
 27 | 
 28 | ```shell
 29 | git clone git@github.com:coolbeevip/camel-database-agent.git
 30 | cd camel-database-agent
 31 | pip install uv ruff mypy
 32 | uv venv .venv --python=3.10
 33 | source .venv/bin/activate
 34 | uv sync --all-extras
 35 | ````
 36 | 
 37 | #### Music Database
 38 | 
 39 | > This database serves as a comprehensive data model for a digital music distribution platform, encompassing various aspects of artist management, customer interactions, and sales transactions.
 40 | 
 41 | Connect to `database/sqlite/music.sqlite` database and use `openai` API to answer questions.
 42 | 
 43 | **NOTE: The first connection will take a few minutes to generate knowledge data.**
 44 | 
 45 | ```shell
 46 | source .venv/bin/activate
 47 | export OPENAI_API_KEY=sk-xxx
 48 | export OPENAI_API_BASE_URL=https://api.openai.com/v1/
 49 | export MODEL_NAME=gpt-4o-mini
 50 | export EMBEDD_MODEL_NAME=text-embedding-ada-002
 51 | python camel_database_agent/cli.py \
 52 | --database-url sqlite:///database/sqlite/music.sqlite
 53 | ```
 54 | ![](docs/screenshot-music-database.png)
 55 | 
 56 | #### School Scheduling Database
 57 | 
 58 | > This database serves as a comprehensive data model for an educational institution, encompassing various aspects of student, faculty, and course management. It includes modules for building management, staff and faculty details, student information, course offerings, and class scheduling
 59 | 
 60 | Connect to `database/sqlite/school_scheduling.sqlite` database and use `openai` API to answer questions a Chinese.
 61 | 
 62 | ```shell
 63 | source .venv/bin/activate
 64 | export OPENAI_API_KEY=sk-xxx
 65 | export OPENAI_API_BASE_URL=https://api.openai.com/v1/
 66 | python camel_database_agent/cli.py \
 67 | --database-url sqlite:///database/sqlite/school_scheduling.sqlite \
 68 | --language Chinese
 69 | ```
 70 | 
 71 | ![](docs/screenshot-school-scheduling-database.png)
 72 | 
 73 | ## Demo Video
 74 | 
 75 | [![CAMEL DatabaseAgent Demo](docs/demo_video.png)](https://youtu.be/Fl065DB8Wqo "Watch the CAMEL DatabaseAgent Demo")
 76 | 
 77 | ## Command Line Options
 78 | 
 79 | > usage: cli.py [-h] --database-url DATABASE_URL [--openai-api-key OPENAI_API_KEY] [--openai-api-base-url OPENAI_API_BASE_URL] [--reset-train] [--read-only] [--language LANGUAGE]
 80 | 
 81 | * database-url: The database [URLs](https://docs.sqlalchemy.org/en/20/core/engines.html#database-urls) to connect to.
 82 | * openai-api-key: The OpenAI API key.
 83 | * openai-api-base-url: The OpenAI API base URL(default is https://api.openai.com/v1/).
 84 | * reset-train: Reset the training data.
 85 | * read-only: Read-only mode.
 86 | * language: Language used to generate training data.
 87 | 
 88 | ## MySQL
 89 | 
 90 | Start a MySQL container with the following command:
 91 | 
 92 | ```shell
 93 | docker run -d \
 94 |   --name camel_mysql \
 95 |   -e MYSQL_ROOT_PASSWORD=123456 \
 96 |   -e MYSQL_DATABASE=school_scheduling \
 97 |   -e MYSQL_USER=camel \
 98 |   -e MYSQL_PASSWORD=123456 \
 99 |   -p 3306:3306 \
100 |   -v $(pwd)/database/mysql:/docker-entrypoint-initdb.d \
101 |   mysql:9
102 | ```
103 | 
104 | Connect to the MySQL database to answer questions.
105 | 
106 | ```shell
107 | python camel_database_agent/cli.py \
108 | --database-url mysql+pymysql://camel:123456@127.0.0.1:3306/school_scheduling
109 | ```
110 | 
111 | ## PostgreSQL
112 | 
113 | Start a PostgreSQL container with the following command:
114 | 
115 | ```shell
116 | docker run -d \
117 |   --name camel_postgresql \
118 |   -e POSTGRES_USER=camel \
119 |   -e POSTGRES_PASSWORD=123456 \
120 |   -e POSTGRES_DB=school_scheduling \
121 |   -p 5432:5432 \
122 |   -v $(pwd)/database/postgresql:/docker-entrypoint-initdb.d \
123 |   postgres:17
124 | ```
125 | 
126 | Connect to the PostgreSQL database to answer questions.
127 | 
128 | ```shell
129 | python camel_database_agent/cli.py \
130 | --database-url postgresql://camel:123456@localhost:5432/school_scheduling
131 | ```
132 | 
133 | ## Developer Integration
134 | 
135 | ```python
136 | import logging
137 | import os
138 | import sys
139 | import uuid
140 | 
141 | import pandas as pd
142 | from camel.embeddings import OpenAIEmbedding
143 | from camel.models import ModelFactory
144 | from camel.types import ModelPlatformType, ModelType
145 | from colorama import Fore
146 | from tabulate import tabulate
147 | 
148 | from camel_database_agent import DatabaseAgent
149 | from camel_database_agent.database.manager import DatabaseManager
150 | from camel_database_agent.database_base import TrainLevel
151 | 
152 | # Configure logging settings to show errors on stdout
153 | logging.basicConfig(
154 |     level=logging.ERROR,
155 |     format="%(message)s",
156 |     handlers=[logging.StreamHandler(sys.stdout)],
157 |     force=True,
158 | )
159 | # Set specific logging level for the application module
160 | logging.getLogger("camel_database_agent").setLevel(logging.INFO)
161 | logger = logging.getLogger(__name__)
162 | 
163 | # Configure pandas display options to show complete data
164 | pd.set_option("display.max_rows", None)  # Show all rows
165 | pd.set_option("display.max_columns", None)  # Show all columns
166 | pd.set_option("display.width", None)  # Auto-detect display width
167 | pd.set_option("display.max_colwidth", None)  # Show full content of each cell
168 | 
169 | # Define database connection string
170 | database_url = "sqlite:///database/sqlite/music.sqlite"
171 | 
172 | # Initialize the database agent with required components
173 | database_agent = DatabaseAgent(
174 |     interactive_mode=True,
175 |     database_manager=DatabaseManager(db_url=database_url),
176 |     # Configure LLM model
177 |     model=ModelFactory.create(
178 |         model_platform=ModelPlatformType.OPENAI,
179 |         model_type=ModelType.GPT_4O_MINI,
180 |         api_key=os.getenv("OPENAI_API_KEY"),
181 |         url=os.getenv("OPENAI_API_BASE_URL"),
182 |     ),
183 |     # Configure embedding model
184 |     embedding_model=OpenAIEmbedding(
185 |         api_key=os.getenv("OPENAI_API_KEY"),
186 |         url=os.getenv("OPENAI_API_BASE_URL"),
187 |     )
188 | )
189 | 
190 | # Train agent's knowledge about the database schema
191 | database_agent.train_knowledge(
192 |     # Training level for database knowledge extraction
193 |     # MEDIUM level: Balances training time and knowledge depth by:
194 |     #  - Analyzing schema relationships
195 |     #  - Extracting representative sample data
196 |     #  - Generating a moderate number of query examples
197 |     level=TrainLevel.MEDIUM,
198 |     # Whether to retrain the knowledge base from scratch
199 |     # If True: Forces regeneration of all database insights and examples
200 |     # If False: Uses existing cached knowledge if available
201 |     reset_train=False,
202 | )
203 | 
204 | # Display database overview information
205 | print(f"{Fore.GREEN}Database Overview")
206 | print("=" * 50)
207 | print(f"{database_agent.get_summary()}\n\n{Fore.RESET}")
208 | 
209 | # Display recommended example questions
210 | print(f"{Fore.GREEN}Recommendation Question")
211 | print("=" * 50)
212 | print(f"{database_agent.get_recommendation_question()}\n\n{Fore.RESET}")
213 | 
214 | # Execute a sample query using natural language
215 | response = database_agent.ask(session_id=str(uuid.uuid4()),
216 |                               question="List all playlists with more than 5 tracks")
217 | 
218 | # Handle and display the query results
219 | if response.success:
220 |     if response.dataset is not None:
221 |         # Format successful results as a table
222 |         data = tabulate(
223 |             tabular_data=response.dataset, headers='keys', tablefmt='psql'
224 |         )
225 |         print(f"{Fore.GREEN}{data}{Fore.RESET}")
226 |     else:
227 |         print(f"{Fore.GREEN}No results found.{Fore.RESET}")
228 |     # Display the SQL that was generated
229 |     print(f"{Fore.YELLOW}{response.sql}{Fore.RESET}")
230 | else:
231 |     # Display error message if query failed
232 |     print(f"{Fore.RED}+ {response.error}{Fore.RESET}")
233 | ```
234 | 
235 | Output
236 | 
237 | ```shell
238 | $ python example.py 
239 | Successfully connected to database: sqlite:///database/sqlite/music.sqlite
240 | Workspace: /Users/zhanglei/camel_database_agent_data
241 | Train knowledge Took 0.1063 seconds
242 | Database Overview
243 | ==================================================
244 | This database is designed to support a digital music platform, encompassing key features for artist management, employee administration, customer relations, and sales transactions. 
245 | 
246 | ### Key Features:
247 | 
248 | 1. **Artist and Album Management**: 
249 |    The `Artist` and `Album` tables form the foundation for managing musical artists and their respective albums. Each artist is uniquely identified and can have multiple albums linked to them, allowing for comprehensive tracking of discographies.
250 | 
251 | 2. **Employee and Customer Management**:
252 |    The `Employee` table captures detailed information about staff, including their roles, contact details, and reporting structure, which is essential for organizational management. The `Customer` table holds customer profiles, enabling personalized service and facilitating communication through provided contact information.
253 | 
254 | 3. **Media Type and Genre Classification**:
255 |    The `MediaType` and `Genre` tables classify music tracks, enabling easy filtering and searching for users based on their preferences for specific genres or media types (e.g. digital downloads, CDs). This classification enhances user experience by making music discovery intuitive and engaging.
256 | 
257 | 4. **Track and Playlist Management**: 
258 |    The `Track` table contains detailed attributes for individual music tracks, including duration and pricing. The `Playlist` table allows users to create and manage custom playlists, which can enrich user engagement and retention by providing a personalized listening experience.
259 | 
260 | 5. **Sales Tracking and Invoicing**:
261 |    The `Invoice` and `InvoiceLine` tables keep track of sales transactions, linking customers with the purchases they make. This structure not only supports effective billing through clear associations between invoices and the tracks purchased but also facilitates revenue tracking and financial reporting. The ability to view total sales and detailed line items allows for comprehensive sales analysis.
262 | 
263 | 6. **Flexible Design for Data Relationships**:
264 |    Through the use of foreign keys and relationships, such as the linkage between customers and their respective invoices, the database provides a robust structure for maintaining data integrity. The design ensures that all relevant information is easily accessible, promoting efficient database utilization.
265 | 
266 | Overall, this database structure provides a complete solution for managing a music platform, supporting critical business functions like customer engagement, sales tracking, and music cataloging. It enables organizations to operate efficiently, ensuring a seamless experience for both customers and internal staff.
267 | 
268 | 
269 | Recommendation Question
270 | ==================================================
271 | List all playlists with more than 5 tracks.
272 | What are the sales figures for each month in 2009?
273 | Show each artist and the number of albums they've released.
274 | What is the total revenue generated from invoices for each customer?
275 | Which tracks belong to the album 'Ball to the Wall'?
276 | 
277 | 
278 | Question to SQL Took 2.8951 seconds
279 | Execute Query SQL Took 0.1036 seconds
280 | +----+--------------+--------------+
281 | |    |   PlaylistId |   TrackCount |
282 | |----+--------------+--------------|
283 | |  0 |            1 |         3290 |
284 | |  1 |            3 |          213 |
285 | |  2 |            5 |         1477 |
286 | |  3 |            8 |         3290 |
287 | |  4 |           10 |          213 |
288 | |  5 |           11 |           39 |
289 | |  6 |           12 |           75 |
290 | |  7 |           13 |           25 |
291 | |  8 |           14 |           25 |
292 | |  9 |           15 |           25 |
293 | | 10 |           16 |           15 |
294 | | 11 |           17 |           26 |
295 | +----+--------------+--------------+
296 | SELECT PlaylistId, COUNT(TrackId) as TrackCount FROM PlaylistTrack GROUP BY PlaylistId HAVING TrackCount > 5;
297 | ```
298 | 
299 | ## Spider 2.0-Lite(Planned)
300 | 
301 | [Spider 2.0-Lite](https://github.com/xlang-ai/Spider2/tree/main/spider2-lite) is a text-to-SQL evaluation framework that includes 547 real enterprise-level database use cases, involving various database systems such as BigQuery, Snowflake, and SQLite, to assess the ability of language models in converting text to SQL in complex enterprise environments.
302 | 
303 | > This use case attempts to query the SQLite database based on user questions 
304 | > and evaluate whether the SQL executes smoothly (**without assessing data accuracy**).
305 | 
306 | * spider2_lite/database/local_sqlite - SQLite database file. [Manual download required](spider2_lite/database/README.md).
307 | * spider2_lite/spider2-lite.jsonl - Question and SQL pairs. [Link](https://github.com/xlang-ai/Spider2/blob/main/spider2-lite/spider2-lite.jsonl)
308 | * spider2_lite/spider2_run - Run the Spider 2.0-Lite evaluation.
309 | 
310 | Run the Spider 2.0-Lite evaluation.
311 | 
312 | ```shell
313 | cd spider2_lite
314 | export OPENAI_API_KEY=sk-xxx
315 | export OPENAI_API_BASE_URL=https://api.openai.com/v1/
316 | export MODEL_NAME=gpt-4o-mini
317 | python spider2_run.py
318 | ```


--------------------------------------------------------------------------------
/add_license_headers.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | find camel_database_agent -type f -name "*.py" | while read -r file
 4 | do
 5 |   if ! grep -q Copyright "$file"
 6 |   then
 7 |     # cat LICENSE_HEADER.tpl "$file" >"$file.new" && mv "$file.new" "$file"
 8 |     { cat LICENSE_HEADER.tpl; echo; cat "$file"; } >"$file.new" && mv "$file.new" "$file"
 9 |   fi
10 | done


--------------------------------------------------------------------------------
/camel_database_agent/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | __all__ = [
16 |     "DataQueryInferencePipeline",
17 |     "DatabaseAgent",
18 |     "DatabaseSchemaDialectMySQL",
19 |     "DatabaseSchemaDialectPostgresql",
20 |     "DatabaseSchemaDialectSqlite",
21 | ]
22 | 
23 | from camel_database_agent.database.dialect.dialect_mysql import (
24 |     DatabaseSchemaDialectMySQL,
25 | )
26 | from camel_database_agent.database.dialect.dialect_postgresql import (
27 |     DatabaseSchemaDialectPostgresql,
28 | )
29 | from camel_database_agent.database.dialect.dialect_sqlite import (
30 |     DatabaseSchemaDialectSqlite,
31 | )
32 | from camel_database_agent.database_agent import DatabaseAgent
33 | from camel_database_agent.datagen.pipeline import (
34 |     DataQueryInferencePipeline,
35 | )
36 | 


--------------------------------------------------------------------------------
/camel_database_agent/cli.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 Lei Zhang
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | """
 16 | 设置 pandas 显示选项
 17 | """
 18 | 
 19 | import argparse
 20 | import hashlib
 21 | import logging
 22 | import os
 23 | import sys
 24 | import uuid
 25 | from threading import Event, Thread
 26 | from urllib.parse import urlparse
 27 | 
 28 | import pandas as pd
 29 | from camel.embeddings import OpenAICompatibleEmbedding
 30 | from camel.models import ModelFactory
 31 | from camel.types import ModelPlatformType
 32 | from colorama import Fore
 33 | from tabulate import tabulate
 34 | 
 35 | from camel_database_agent import DatabaseAgent
 36 | from camel_database_agent.database.manager import DatabaseManager
 37 | from camel_database_agent.database_agent import DatabaseAgentResponse
 38 | from camel_database_agent.database_base import TrainLevel, spinner
 39 | 
 40 | """Logging"""
 41 | logging.basicConfig(
 42 |     level=logging.ERROR,
 43 |     format="%(message)s",
 44 |     handlers=[logging.StreamHandler(sys.stdout)],
 45 |     force=True,
 46 | )
 47 | logging.getLogger("camel_database_agent").setLevel(logging.INFO)
 48 | logger = logging.getLogger(__name__)
 49 | 
 50 | """Pandas display"""
 51 | pd.set_option("display.max_rows", None)  # Show all rows
 52 | pd.set_option("display.max_columns", None)  # Show all columns
 53 | pd.set_option("display.width", None)  # Auto-detect display width
 54 | pd.set_option("display.max_colwidth", None)  # Show full content of each cell
 55 | 
 56 | 
 57 | def generate_db_id(db_url: str, language: str) -> str:
 58 |     """
 59 |     Generate a unique ID from a database URL by hashing relevant parts.
 60 | 
 61 |     Args:
 62 |         db_url: SQLAlchemy database connection string
 63 | 
 64 |     Returns:
 65 |         A unique ID string derived from the database connection
 66 |     """
 67 |     # Parse the database URL
 68 |     parsed_url = urlparse(db_url)
 69 | 
 70 |     # Extract components that uniquely identify this database
 71 |     dialect = parsed_url.scheme
 72 |     netloc = parsed_url.netloc
 73 |     path = parsed_url.path
 74 | 
 75 |     # Create a string with the most important identifying information
 76 |     db_identifier = f"{dialect}:{netloc}{path}:{language}"
 77 | 
 78 |     # Create a hash of the identifier
 79 |     db_hash = hashlib.md5(db_identifier.encode()).hexdigest()
 80 | 
 81 |     # Use first 12 characters for a reasonably unique but short ID
 82 |     short_id = db_hash[:12]
 83 | 
 84 |     return short_id
 85 | 
 86 | 
 87 | def main() -> None:
 88 |     parser = argparse.ArgumentParser(description="Query the database using natural language.")
 89 |     parser.add_argument(
 90 |         "--database-url",
 91 |         "-d",
 92 |         required=True,
 93 |         help="Database URL (e.g., sqlite:///db.sqlite)",
 94 |     )
 95 |     parser.add_argument(
 96 |         "--openai-api-key",
 97 |         "-key",
 98 |         required=False,
 99 |         default=os.environ.get("OPENAI_API_KEY"),
100 |         help="OpenAI KEY",
101 |     )
102 |     parser.add_argument(
103 |         "--openai-api-base-url",
104 |         "-url",
105 |         required=False,
106 |         default=os.environ.get("OPENAI_API_BASE_URL", "https://api.openai.com/v1"),
107 |         help="OPENAI API",
108 |     )
109 |     parser.add_argument(
110 |         "--model-name",
111 |         "-em",
112 |         required=False,
113 |         default=os.environ.get("MODEL_NAME", "gpt-4o-mini"),
114 |         help="Model name, such as gpt-3.5-turbo or gpt-4o-mini",
115 |     )
116 |     parser.add_argument(
117 |         "--embedd-model-name",
118 |         "-m",
119 |         required=False,
120 |         default=os.environ.get("EMBED_MODEL_NAME", "text-embedding-ada-002"),
121 |         help="Embedding model name, such as text-embedding-ada-002",
122 |     )
123 |     parser.add_argument("--reset-train", "-rt", action="store_true", help="Retraining knowledge")
124 |     parser.add_argument(
125 |         "--read-only", "-ro", action="store_true", default=True, help="SQL Read-Only Model"
126 |     )
127 |     parser.add_argument(
128 |         "--language",
129 |         "-lang",
130 |         required=False,
131 |         default="English",
132 |         help="The language you used to ask the question, such as English or Chinese.",
133 |     )
134 |     parser.add_argument(
135 |         "--timeout",
136 |         required=False,
137 |         default=1800,
138 |         help="The timeout value in seconds for API calls.",
139 |     )
140 |     args = parser.parse_args()
141 | 
142 |     # Create a data directory for the database agent
143 |     user_home = os.path.expanduser("~")
144 |     data_path = os.path.join(
145 |         user_home, "camel_database_agent_data", generate_db_id(args.database_url, args.language)
146 |     )
147 | 
148 |     # Create a database manager and database agent
149 |     database_manager = DatabaseManager(db_url=args.database_url)
150 |     database_agent = DatabaseAgent(
151 |         interactive_mode=True,
152 |         database_manager=database_manager,
153 |         model=ModelFactory.create(
154 |             model_platform=ModelPlatformType.OPENAI_COMPATIBLE_MODEL,
155 |             model_type=args.model_name,
156 |             api_key=args.openai_api_key,
157 |             url=args.openai_api_base_url,
158 |             timeout=args.timeout,
159 |         ),
160 |         embedding_model=OpenAICompatibleEmbedding(
161 |             api_key=args.openai_api_key,
162 |             url=args.openai_api_base_url,
163 |             model_type=args.embedd_model_name,
164 |         ),
165 |         language=args.language,
166 |         data_path=data_path,
167 |     )
168 |     token_usage = database_agent.train_knowledge(
169 |         level=TrainLevel.MEDIUM,
170 |         reset_train=args.reset_train,
171 |     )
172 | 
173 |     print(f"{Fore.GREEN}")
174 |     print("=" * 50)
175 |     print(f"{Fore.GREEN}Database Overview")
176 |     print("=" * 50)
177 |     print(f"{database_agent.get_summary()}")
178 |     print("=" * 50)
179 |     print(f"{Fore.LIGHTYELLOW_EX}Recommendation Question")
180 |     print("=" * 50)
181 |     print(f"{database_agent.get_recommendation_question()}{Fore.RESET}")
182 |     print(f"{Fore.CYAN}=" * 50)
183 |     if args.read_only:
184 |         print(f"Interactive Database Agent Query({Fore.GREEN}Read-Only Mode{Fore.RESET})")
185 |     else:
186 |         print(f"Interactive Database Agent Query({Fore.LIGHTRED_EX}Read-Write Model{Fore.RESET})")
187 |     print(f"{Fore.CYAN}Type {Fore.RED}'exit' or 'quit'{Fore.RESET} to end the session")
188 |     print(
189 |         f"{Fore.CYAN}Type {Fore.LIGHTYELLOW_EX}'help'{Fore.RESET} "
190 |         f"to get more recommended questions"
191 |     )
192 |     print(f"{Fore.CYAN}Training completed, using {token_usage.total_tokens} tokens{Fore.RESET}")
193 |     print(f"{Fore.CYAN}=" * 50)
194 | 
195 |     session_id = str(uuid.uuid4())
196 | 
197 |     while True:
198 |         user_question = input(f"{Fore.CYAN}Enter your question: {Fore.RESET}")
199 |         user_question = user_question.strip()
200 |         if user_question.lower() in ["exit", "quit"]:
201 |             print(f"{Fore.YELLOW}Exiting interactive mode. Goodbye!{Fore.RESET}")
202 |             break
203 |         if user_question.lower() == "help":
204 |             print(f"{Fore.GREEN}Database Overview")
205 |             print("=" * 50)
206 |             print(f"{database_agent.get_summary()}")
207 |             print(f"{Fore.LIGHTYELLOW_EX}Recommendation Question")
208 |             print("=" * 50)
209 |             print(f"{database_agent.get_recommendation_question()}{Fore.RESET}")
210 |         elif len(user_question) > 0:
211 |             stop_spinner = Event()
212 |             spinner_thread = Thread(target=spinner, args=(stop_spinner, "Thinking..."))
213 |             spinner_thread.daemon = True
214 |             try:
215 |                 # Set up and start the spinner in a separate thread
216 |                 spinner_thread.start()
217 | 
218 |                 # Ask the database agent
219 |                 response: DatabaseAgentResponse = database_agent.ask(
220 |                     session_id=session_id,
221 |                     question=user_question,
222 |                 )
223 | 
224 |                 # Stop the spinner (it will clear the line)
225 |                 stop_spinner.set()
226 |                 spinner_thread.join()
227 | 
228 |                 if response.success:
229 |                     if response.dataset is not None:
230 |                         data = tabulate(
231 |                             tabular_data=response.dataset, headers='keys', tablefmt='psql'
232 |                         )
233 |                         print(f"{Fore.GREEN}{data}{Fore.RESET}")
234 |                     else:
235 |                         print(f"{Fore.GREEN}No results found.{Fore.RESET}")
236 |                     print(f"{Fore.YELLOW}{response.sql}{Fore.RESET}")
237 |                 else:
238 |                     print(f"{Fore.RED}+ {response.error}{Fore.RESET}")
239 |                 if response.usage:
240 |                     print(
241 |                         f"{Fore.YELLOW}Tokens used: {response.usage['total_tokens']}{Fore.RESET}"
242 |                     )
243 |             except Exception as e:
244 |                 # Make sure to stop the spinner on exception
245 |                 if 'stop_spinner' in locals() and not stop_spinner.is_set():
246 |                     stop_spinner.set()
247 |                     spinner_thread.join()
248 |                 print(f"{Fore.RED}ERROR: {e}{Fore.RESET}")
249 | 
250 | 
251 | if __name__ == "__main__":
252 |     main()
253 | 


--------------------------------------------------------------------------------
/camel_database_agent/core/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/camel_database_agent/core/exceptions.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 
16 | class DataGenerationError(Exception):
17 |     """数据生成过程中的异常基类"""
18 | 
19 |     pass
20 | 
21 | 
22 | class QueryParsingError(DataGenerationError):
23 |     """查询解析错误"""
24 | 
25 |     pass
26 | 
27 | 
28 | class KnowledgeException(Exception):
29 |     """统一的数据库知识异常类"""
30 | 
31 |     pass
32 | 


--------------------------------------------------------------------------------
/camel_database_agent/core/method_lru_cache.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from functools import lru_cache
16 | from typing import Any, Callable, TypeVar, cast
17 | 
18 | R = TypeVar('R')
19 | 
20 | 
21 | def method_lru_cache(maxsize: int = 128):
22 |     """A decorator that applies lru_cache to a method safely."""
23 | 
24 |     def decorator(func: Callable[..., R]) -> Callable[..., R]:
25 |         cache = lru_cache(maxsize=maxsize)(func)
26 | 
27 |         def wrapper(self, *args: Any, **kwargs: Any) -> R:
28 |             return cast(R, cache(self, *args, **kwargs))
29 | 
30 |         wrapper.cache_clear = cache.cache_clear  # type: ignore[attr-defined]
31 |         wrapper.cache_info = cache.cache_info  # type: ignore[attr-defined]
32 |         return wrapper
33 | 
34 |     return decorator
35 | 


--------------------------------------------------------------------------------
/camel_database_agent/database/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/camel_database_agent/database/dialect/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/camel_database_agent/database/dialect/dialect.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 Lei Zhang
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import abc
 16 | import logging
 17 | from typing import ClassVar, List, Optional, Type, TypeVar, Union
 18 | 
 19 | from camel.agents import ChatAgent
 20 | from camel.models import BaseModelBackend
 21 | from tabulate import tabulate
 22 | 
 23 | from camel_database_agent.database.manager import DatabaseManager
 24 | from camel_database_agent.database.prompts import PromptTemplates
 25 | 
 26 | logger = logging.getLogger(__name__)
 27 | 
 28 | T = TypeVar("T", bound="DatabaseSchemaDialect")
 29 | 
 30 | 
 31 | class DatabaseSchemaDialect(abc.ABC):
 32 |     dialect_name: str
 33 |     dialect_map: ClassVar[dict[str, Type["DatabaseSchemaDialect"]]] = {}
 34 |     schema_polish_agent: ChatAgent
 35 |     schema: str
 36 | 
 37 |     def __init__(
 38 |         self,
 39 |         database_manager: DatabaseManager,
 40 |         model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None,
 41 |     ):
 42 |         self.database_manager = database_manager
 43 |         if model:
 44 |             self.schema_polish_agent = ChatAgent(
 45 |                 system_message="You are a database expert, proficient in the "
 46 |                 "SQL syntax of various databases.",
 47 |                 model=model,
 48 |             )
 49 | 
 50 |     @classmethod
 51 |     def register(cls, dialect_type: Type[T]) -> Type[T]:
 52 |         if not issubclass(dialect_type, DatabaseSchemaDialect):
 53 |             raise TypeError(f"Expected subclass of DatabaseSchemaDialect, got {dialect_type}")
 54 |         cls.dialect_map[dialect_type.dialect_name] = dialect_type
 55 |         return dialect_type
 56 | 
 57 |     @classmethod
 58 |     def get_dialect(
 59 |         cls,
 60 |         dialect_name: str,
 61 |         database_manager: DatabaseManager,
 62 |         model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None,
 63 |     ) -> "DatabaseSchemaDialect":
 64 |         dialect_type: Type["DatabaseSchemaDialect"] = cls.dialect_map[dialect_name]
 65 |         return dialect_type(database_manager=database_manager, model=model)
 66 | 
 67 |     def get_polished_schema(self, language: str = "English") -> str:
 68 |         if self.schema_polish_agent:
 69 |             prompt = PromptTemplates.POLISH_SCHEMA_OUTPUT_EXAMPLE.replace(
 70 |                 "{{ddl_sql}}", self.get_schema()
 71 |             ).replace("{{language}}", language)
 72 |             response = self.schema_polish_agent.step(prompt)
 73 |             return response.msgs[0].content
 74 |         else:
 75 |             return self.get_schema()
 76 | 
 77 |     @abc.abstractmethod
 78 |     def get_schema(self) -> str:
 79 |         """
 80 |         Abstract method that returns the database schema as a string.
 81 |         Must be implemented by all dialect subclasses.
 82 |         """
 83 |         pass
 84 | 
 85 |     @abc.abstractmethod
 86 |     def get_table_names(self) -> List[str]:
 87 |         """
 88 |         Abstract method that returns the table names in the database.
 89 |         Must be implemented by all dialect subclasses.
 90 |         """
 91 |         pass
 92 | 
 93 |     def get_sampled_data(self, data_samples_size: int = 5) -> str:
 94 |         """
 95 |         Abstract method that returns sampled data from the database as a string.
 96 |         Must be implemented by all dialect subclasses.
 97 |         """
 98 |         metadata = self.database_manager.get_metadata()
 99 |         sample_data = []
100 | 
101 |         for table_name in metadata.tables:
102 |             # table = metadata.tables[table_name]
103 |             # column_names = [column.name for column in table.columns]
104 | 
105 |             sample_query = f"SELECT * FROM {table_name} LIMIT {data_samples_size}"
106 |             try:
107 |                 rows = self.database_manager.select(sample_query)
108 |                 dataset = tabulate(tabular_data=rows, headers='keys', tablefmt='psql')
109 |                 sample_data.append(f"## {table_name}\n\n{dataset}")
110 |                 # for row in rows:
111 |                 #     columns = []
112 |                 #     values = []
113 |                 #
114 |                 #     for col_name in column_names:
115 |                 #         if col_name in row and row[col_name] is not None:
116 |                 #             columns.append(col_name)
117 |                 #             if isinstance(row[col_name], str):
118 |                 #                 values.append("'" + row[col_name].replace("'", "''") + "'")
119 |                 #             elif isinstance(row[col_name], (int, float)):
120 |                 #                 values.append(str(row[col_name]))
121 |                 #             else:
122 |                 #                 values.append(f"'{row[col_name]!s}'")
123 |                 #
124 |                 #     if columns and values:
125 |                 #         columns_stmt = ', '.join(columns)
126 |                 #         values_stmt = ', '.join(values)
127 |                 #         insert_stmt = (
128 |                 #             f"INSERT INTO {table_name} ({columns_stmt}) VALUES ({values_stmt});"
129 |                 #         )
130 |                 #         sample_data_sql.append(insert_stmt)
131 | 
132 |             except Exception as e:
133 |                 logger.warning(f"Error sampling data from table {table_name}: {e}")
134 | 
135 |         return "\n\n".join(sample_data)
136 | 


--------------------------------------------------------------------------------
/camel_database_agent/database/dialect/dialect_mysql.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import ClassVar, List, Optional, Union
16 | 
17 | from camel.models import BaseModelBackend
18 | 
19 | from camel_database_agent.database.dialect.dialect import (
20 |     DatabaseSchemaDialect,
21 | )
22 | from camel_database_agent.database.manager import (
23 |     DatabaseManager,
24 | )
25 | 
26 | 
27 | class DatabaseSchemaDialectMySQL(DatabaseSchemaDialect):
28 |     dialect_name = "mysql"
29 |     table_names: ClassVar[List[str]] = []
30 | 
31 |     def __init__(
32 |         self,
33 |         database_manager: DatabaseManager,
34 |         model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None,
35 |     ):
36 |         super().__init__(database_manager=database_manager, model=model)
37 |         ddl_statements = []
38 |         for table in self.database_manager.get_metadata().sorted_tables:
39 |             self.table_names.append(table.name)
40 |             result = database_manager.select(f"SHOW CREATE TABLE {table.name}")
41 |             if result:
42 |                 create_table = result[0]["Create Table"]
43 |                 ddl_statements.append(create_table + ";")
44 |         self.schema = "\n".join(ddl_statements)
45 | 
46 |     def get_schema(self) -> str:
47 |         return self.schema
48 | 
49 |     def get_table_names(self) -> List[str]:
50 |         return self.table_names
51 | 
52 | 
53 | DatabaseSchemaDialect.register(DatabaseSchemaDialectMySQL)
54 | 


--------------------------------------------------------------------------------
/camel_database_agent/database/dialect/dialect_postgresql.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import ClassVar, List, Optional, Union
16 | 
17 | from camel.models import BaseModelBackend
18 | 
19 | from camel_database_agent.database.dialect.dialect import (
20 |     DatabaseSchemaDialect,
21 | )
22 | from camel_database_agent.database.manager import DatabaseManager
23 | 
24 | 
25 | class DatabaseSchemaDialectPostgresql(DatabaseSchemaDialect):
26 |     dialect_name = "postgresql"
27 |     table_names: ClassVar[List[str]] = []
28 | 
29 |     def __init__(
30 |         self,
31 |         database_manager: DatabaseManager,
32 |         model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None,
33 |     ):
34 |         super().__init__(database_manager=database_manager, model=model)
35 |         ddl_statements = []
36 |         for table in self.database_manager.get_metadata().sorted_tables:
37 |             self.table_names.append(table.name)
38 |             create_stmt = [f"CREATE TABLE {table.name} ("]
39 |             columns = []
40 |             for column in table.columns:
41 |                 col_def = f"    {column.name} {column.type}"
42 |                 if not column.nullable:
43 |                     col_def += " NOT NULL"
44 |                 if column.primary_key:
45 |                     col_def += " PRIMARY KEY"
46 |                 if column.server_default:
47 |                     if hasattr(column.server_default, "arg"):
48 |                         col_def += f" DEFAULT {column.server_default.arg}"
49 |                     else:
50 |                         col_def += f" DEFAULT {column.server_default}"
51 |                 columns.append(col_def)
52 |             create_stmt.append(",\n".join(columns))
53 |             create_stmt.append(");")
54 | 
55 |             # 获取表注释
56 |             result = self.database_manager.select(
57 |                 f"SELECT obj_description('{table.name}'::regclass, 'pg_class')"
58 |             )
59 |             table_comment = result[0]['obj_description']
60 |             if table_comment:
61 |                 create_stmt.append(f"COMMENT ON TABLE {table.name} IS '{table_comment}';")
62 | 
63 |             # 获取列注释
64 |             for column in table.columns:
65 |                 result = self.database_manager.select(
66 |                     f"SELECT col_description('{table.name}'::regclass, "
67 |                     f"(SELECT ordinal_position FROM information_schema.columns "
68 |                     f"WHERE table_name = '{table.name}' AND column_name = '{column.name}'))"
69 |                 )
70 |                 col_comment = result[0]['col_description']
71 |                 if col_comment:
72 |                     create_stmt.append(
73 |                         f"COMMENT ON COLUMN {table.name}.{column.name} IS '{col_comment}';"
74 |                     )
75 | 
76 |             ddl_statements.append("\n".join(create_stmt))
77 |         self.schema = "\n".join(ddl_statements)
78 | 
79 |     def get_schema(self) -> str:
80 |         return self.schema
81 | 
82 |     def get_table_names(self) -> List[str]:
83 |         return self.table_names
84 | 
85 | 
86 | DatabaseSchemaDialect.register(DatabaseSchemaDialectPostgresql)
87 | 


--------------------------------------------------------------------------------
/camel_database_agent/database/dialect/dialect_sqlite.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | from typing import ClassVar, List, Optional, Union
16 | 
17 | from camel.models import BaseModelBackend
18 | from sqlalchemy import DefaultClause
19 | 
20 | from camel_database_agent.database.dialect.dialect import (
21 |     DatabaseSchemaDialect,
22 | )
23 | from camel_database_agent.database.manager import DatabaseManager
24 | 
25 | 
26 | class DatabaseSchemaDialectSqlite(DatabaseSchemaDialect):
27 |     """
28 |     SQLite doesn't support comments in standard DDL, so we use the best approximation
29 |     """
30 | 
31 |     dialect_name = "sqlite"
32 |     table_names: ClassVar[List[str]] = []
33 | 
34 |     def __init__(
35 |         self,
36 |         database_manager: DatabaseManager,
37 |         model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None,
38 |     ):
39 |         super().__init__(database_manager=database_manager, model=model)
40 |         ddl_statements = []
41 |         for table in self.database_manager.get_metadata().sorted_tables:
42 |             self.table_names.append(table.name)
43 |             create_stmt = f"CREATE TABLE {table.name} (\n"
44 |             columns = []
45 |             for column in table.columns:
46 |                 col_def = f"    {column.name} {column.type}"
47 |                 if not column.nullable:
48 |                     col_def += " NOT NULL"
49 |                 if column.primary_key:
50 |                     col_def += " PRIMARY KEY"
51 |                 if isinstance(column.server_default, DefaultClause):
52 |                     col_def += f" DEFAULT {column.server_default.arg}"
53 |                 columns.append(col_def)
54 |             create_stmt += ",\n".join(columns)
55 |             create_stmt += "\n);"
56 |             ddl_statements.append(create_stmt)
57 |         self.schema = "\n".join(ddl_statements)
58 | 
59 |     def get_schema(self) -> str:
60 |         return self.schema
61 | 
62 |     def get_table_names(self) -> List[str]:
63 |         return self.table_names
64 | 
65 | 
66 | DatabaseSchemaDialect.register(DatabaseSchemaDialectSqlite)
67 | 


--------------------------------------------------------------------------------
/camel_database_agent/database/manager.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 Lei Zhang
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import functools
 16 | import logging
 17 | from contextlib import contextmanager
 18 | from typing import Any, Callable, Iterator, List, TypeVar, Union
 19 | 
 20 | import pandas as pd
 21 | from sqlalchemy import MetaData, Result, TextClause, create_engine, text
 22 | from sqlalchemy.exc import OperationalError
 23 | from sqlalchemy.orm import Session, sessionmaker
 24 | 
 25 | from camel_database_agent.database_base import SQLExecutionError, timing
 26 | 
 27 | T = TypeVar("T")
 28 | 
 29 | logger = logging.getLogger(__name__)
 30 | 
 31 | read_only_message = (
 32 |     "Operation rejected: This SQL contains statements that "
 33 |     "could modify data or schema (DROP, DELETE, UPDATE, etc.)"
 34 |     " which is not allowed in read-only mode."
 35 | )
 36 | 
 37 | 
 38 | @contextmanager
 39 | def session_scope(session_maker: sessionmaker) -> Iterator[Session]:
 40 |     """Context manager for database session handling."""
 41 |     session = session_maker()
 42 |     try:
 43 |         yield session
 44 |         session.commit()
 45 |     except Exception:
 46 |         session.rollback()
 47 |         raise
 48 |     finally:
 49 |         session.close()
 50 | 
 51 | 
 52 | def with_session(func: Callable) -> Callable:
 53 |     """Decorator that handles session creation and cleanup."""
 54 | 
 55 |     @functools.wraps(func)
 56 |     def wrapper(self: Any, *args: Any, **kwargs: Any) -> Any:
 57 |         with session_scope(self.Session) as session:
 58 |             return func(self, session, *args, **kwargs)
 59 | 
 60 |     return wrapper
 61 | 
 62 | 
 63 | class DatabaseManager:
 64 |     def __init__(self, db_url: str, read_only_model: bool = True):
 65 |         self.db_url = db_url
 66 |         self.read_only_model = read_only_model
 67 |         self.engine = create_engine(db_url)
 68 |         self.Session = sessionmaker(bind=self.engine)
 69 |         self.metadata = MetaData()
 70 |         with self.engine.connect():
 71 |             logger.info(f"Successfully connected to database: {db_url}")
 72 | 
 73 |     @timing
 74 |     @with_session
 75 |     def select(
 76 |         self, session: Session, sql: str, bind_pd: bool = False
 77 |     ) -> Union[List[dict], pd.DataFrame]:
 78 |         """Execute Query SQL"""
 79 |         self._check_sql(sql)
 80 |         try:
 81 |             result: Result = session.execute(text(sql))
 82 |             if bind_pd:
 83 |                 return pd.DataFrame(result.fetchall(), columns=list(result.keys()))
 84 |             else:
 85 |                 # 转换结果为列表字典格式
 86 |                 column_names = result.keys()
 87 |                 rows = [dict(zip(column_names, row)) for row in result]
 88 |                 return rows
 89 |         except OperationalError as e:
 90 |             raise SQLExecutionError(sql, str(e))
 91 | 
 92 |     @with_session
 93 |     def execute(
 94 |         self, session: Session, sql: Union[str, List[str]], ignore_sql_check: bool = False
 95 |     ) -> bool:
 96 |         """Execute one or more UPDATE/INSERT/DELETE statements."""
 97 |         if not ignore_sql_check:
 98 |             self._check_sql(sql)
 99 |         if isinstance(sql, str):
100 |             for statement in sql.split(";"):
101 |                 if statement.strip():
102 |                     session.execute(text(statement))
103 |         else:
104 |             for statement in sql:
105 |                 if statement.strip():
106 |                     session.execute(text(statement.strip()))
107 |         return True
108 | 
109 |     def dialect_name(self) -> str:
110 |         return self.engine.dialect.name
111 | 
112 |     def get_metadata(self) -> MetaData:
113 |         self.metadata.reflect(bind=self.engine)
114 |         return self.metadata
115 | 
116 |     def _check_sql(self, sql: Union[str, List[str]]) -> None:
117 |         """Check if SQL is safe to execute (non-destructive)."""
118 |         if self.read_only_model:
119 |             dangerous_keywords = {
120 |                 # Standalone keywords that modify data/schema
121 |                 "DROP": True,
122 |                 "TRUNCATE": True,
123 |                 "DELETE": True,
124 |                 "UPDATE": True,
125 |                 "INSERT": True,
126 |                 "ALTER": True,
127 |                 "RENAME": True,
128 |                 "REPLACE": True,
129 |                 # CREATE is special case - some forms are read-only
130 |                 "CREATE": {"SAFE_PREFIXES": ["SHOW CREATE"]},
131 |             }
132 | 
133 |             statements = []
134 |             if isinstance(sql, str):
135 |                 statements = [stmt.strip().upper() for stmt in sql.split(";") if stmt.strip()]
136 |             elif isinstance(sql, TextClause):
137 |                 statements = [stmt.strip().upper() for stmt in sql.text if stmt.strip()]
138 |             else:
139 |                 statements = [stmt.strip().upper() for stmt in sql if stmt.strip()]
140 | 
141 |             # Check each statement for dangerous keywords
142 |             for stmt in statements:
143 |                 stmt_upper = stmt.upper()
144 |                 for keyword, config in dangerous_keywords.items():
145 |                     if isinstance(config, bool) and config:
146 |                         if keyword in stmt_upper.split():
147 |                             raise SQLExecutionError('\n'.join(statements), read_only_message)
148 |                     elif isinstance(config, dict):
149 |                         # Handle special cases with exceptions
150 |                         if keyword in stmt_upper.split():
151 |                             is_safe = False
152 |                             for safe_prefix in config.get("SAFE_PREFIXES", []):
153 |                                 if stmt_upper.startswith(safe_prefix):
154 |                                     is_safe = True
155 |                                     break
156 |                             if not is_safe:
157 |                                 raise SQLExecutionError('\n'.join(statements), read_only_message)
158 | 


--------------------------------------------------------------------------------
/camel_database_agent/database/prompts.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import textwrap
16 | 
17 | 
18 | class PromptTemplates:
19 |     POLISH_SCHEMA_OUTPUT_EXAMPLE = textwrap.dedent("""
20 |     Please add detailed {{language}} comments to the following DDL script, explaining the business meaning and design intent of each table and field.
21 |     
22 |     Requirements:
23 |     - Keep the original DDL script completely unchanged
24 |     - Add comments before the script
25 |     - Comments should be professional and concise
26 |     - Use SQL -- comment syntax
27 |     
28 |     DDL Script:
29 |     ```sql
30 |     {{ddl_sql}}
31 |     ```
32 |     
33 |     Output Example:
34 |     ```json
35 |     -- User Management Table stores basic information and authentication credentials for system users. Applicable scenarios include user registration, login, and permission management.
36 |     CREATE TABLE users (    
37 |         id INT AUTO_INCREMENT PRIMARY KEY, -- Unique user identifier, auto-increment ID    
38 |         username VARCHAR(50) NOT NULL UNIQUE, -- User login account, 50 character length, ensures uniqueness    
39 |         email VARCHAR(100) NOT NULL UNIQUE, -- User email, used for notifications and password recovery, 100 character length    
40 |         password VARCHAR(255) NOT NULL, -- User password stored with encryption, recommended to use hash algorithm        
41 |         full_name VARCHAR(100), -- User full name, optional field    
42 |         created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, -- User account creation timestamp, defaults to current time        
43 |         last_login TIMESTAMP NULL, -- Most recent login time, can be initially null        
44 |         is_active BOOLEAN DEFAULT TRUE -- Account status flag, default is active
45 |     );
46 |     ```
47 |     
48 |     Key Strategies:
49 |     - Clearly instruct not to modify the original DDL
50 |     - Provide specific guidance for adding comments
51 |     - Specify the expected format and content of comments
52 |     - Emphasize professionalism and conciseness
53 |     """)
54 | 
55 |     PARSE_SAMPLED_RECORD = textwrap.dedent("""
56 |     # JSON Format Request
57 |     You are a specialized JSON generator. Your only function is to parse the provided data and convert it to JSON format, strictly following the format requirements.
58 |     
59 |     ## Input Data:
60 |     {{section}}
61 |     
62 |     ## Instructions:
63 |     1. Create a JSON array with each table as an object
64 |     2. Each object must have exactly three fields:
65 |        - "id": the table name
66 |        - "summary": a brief description of the table
67 |        - "dataset": the data in markdown format
68 |     3. The entire response must be ONLY valid JSON without any additional text, explanation, or markdown code blocks
69 |     
70 |     ## Required Output Format:
71 |     {
72 |         "items":[{
73 |             "id": "<table name>",
74 |             "summary": "<table summary>",
75 |             "dataset": "<markdown dataset>"
76 |         }]
77 |     }
78 |     
79 |     ## IMPORTANT:
80 |     - Your response must contain ONLY the JSON object, nothing else
81 |     - Do not include explanations, introductions, or conclusions
82 |     - Do not use markdown code blocks (```) around the JSON
83 |     - Do not include phrases like "Here's the JSON" or "I've created the JSON"
84 |     - Do not indicate that you are providing the output in any way""")
85 | 


--------------------------------------------------------------------------------
/camel_database_agent/database/schema.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 Lei Zhang
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import logging
 16 | import re
 17 | import textwrap
 18 | from typing import Generic, List, Optional, TypeVar, Union
 19 | 
 20 | from camel.agents import ChatAgent
 21 | from camel.models import BaseModelBackend
 22 | from pydantic import BaseModel
 23 | 
 24 | from camel_database_agent.database.manager import DatabaseManager
 25 | from camel_database_agent.database.prompts import PromptTemplates
 26 | from camel_database_agent.database_base import timing
 27 | 
 28 | logger = logging.getLogger(__name__)
 29 | 
 30 | 
 31 | class DDLRecord(BaseModel):
 32 |     id: str
 33 |     summary: str
 34 |     sql: str
 35 | 
 36 | 
 37 | class DMLRecord(BaseModel):
 38 |     id: str
 39 |     summary: str
 40 |     dataset: str
 41 | 
 42 | 
 43 | class QueryRecord(BaseModel):
 44 |     id: str
 45 |     question: str
 46 |     sql: str
 47 | 
 48 | 
 49 | T = TypeVar('T', bound=BaseModel)
 50 | 
 51 | 
 52 | class SchemaParseResponse(BaseModel, Generic[T]):
 53 |     data: List[T]
 54 |     usage: Optional[dict]
 55 |     errors: Optional[List[T]] = None
 56 | 
 57 | 
 58 | class DDLRecordResponseFormat(BaseModel):
 59 |     items: List[DDLRecord]
 60 | 
 61 | 
 62 | class DMLRecordResponseFormat(BaseModel):
 63 |     items: List[DMLRecord]
 64 | 
 65 | 
 66 | class QueryRecordResponseFormat(BaseModel):
 67 |     items: List[QueryRecord]
 68 | 
 69 | 
 70 | class DatabaseSchemaParse:
 71 |     def __init__(
 72 |         self,
 73 |         database_manager: DatabaseManager,
 74 |         model: Union[BaseModelBackend, List[BaseModelBackend]],
 75 |     ):
 76 |         self.database_manager = database_manager
 77 |         self.parsing_agent = ChatAgent(
 78 |             system_message="You are a database expert, proficient in the "
 79 |             "SQL syntax of various databases.",
 80 |             model=model,
 81 |         )
 82 | 
 83 |     @timing
 84 |     def parse_ddl_record(self, text: str) -> SchemaParseResponse:
 85 |         """Parsing DDL Schema"""
 86 |         prompt = (
 87 |             "Translate the following information into a JSON array format, "
 88 |             "with each JSON object in the array containing three "
 89 |             "elements: "
 90 |             "\"id\" for the table name, "
 91 |             "\"summary\" for a summary of the table, and "
 92 |             "\"sql\" for the SQL statement of the table creation.\n\n"
 93 |         )
 94 |         if text.startswith("```sql"):
 95 |             prompt += f"{text}\n\n"
 96 |         else:
 97 |             prompt += f"```sql\n{text}```\n\n"
 98 | 
 99 |         # 非 openai 模型要增加以下片段
100 |         prompt += textwrap.dedent(
101 |             "Output Format:\n"
102 |             "{"
103 |             "    \"items\":"
104 |             "        ["
105 |             "            {"
106 |             "                \"id\": \"<table name>\","
107 |             "                \"summary\": \"<table summary>\","
108 |             "                \"sql\": \"<table ddl script>\""
109 |             "            }"
110 |             "        ]"
111 |             "}\n\n"
112 |         )
113 |         prompt += "Now, directly output the JSON array without explanation."
114 |         response = self.parsing_agent.step(prompt, response_format=DDLRecordResponseFormat)
115 |         ddl_record_response = DDLRecordResponseFormat.model_validate_json(response.msgs[0].content)
116 |         return SchemaParseResponse(data=ddl_record_response.items, usage=response.info["usage"])
117 | 
118 |     @timing
119 |     def parse_sampled_record(self, text: str) -> SchemaParseResponse:
120 |         """Parsing Sampled Data"""
121 |         data: List[DMLRecord] = []
122 |         usage: Optional[dict] = None
123 |         sections = self.split_markdown_by_h2(text)
124 |         for section in sections:
125 |             prompt = PromptTemplates.PARSE_SAMPLED_RECORD.replace("{{section}}", section)
126 |             try:
127 |                 self.parsing_agent.reset()
128 |                 response = self.parsing_agent.step(prompt, response_format=DMLRecordResponseFormat)
129 |                 dml_record_response = DMLRecordResponseFormat.model_validate_json(
130 |                     response.msgs[0].content
131 |                 )
132 |                 data.extend(dml_record_response.items)
133 |                 if usage is None:
134 |                     usage = response.info["usage"]
135 |                 else:
136 |                     usage["completion_tokens"] += response.info["usage"]["completion_tokens"]
137 |                     usage["prompt_tokens"] += response.info["usage"]["prompt_tokens"]
138 |                     usage["total_tokens"] += response.info["usage"]["total_tokens"]
139 |             except Exception as e:
140 |                 logger.error(f"Unable to process messages: {e}")
141 |                 logger.error(f"Prompt: {prompt}")
142 |         return SchemaParseResponse(data=data, usage=usage)
143 | 
144 |     @timing
145 |     def parse_query_record(self, text: str) -> SchemaParseResponse:
146 |         """Parsing Query SQL statements"""
147 |         prompt = (
148 |             "The following is an analysis of user query requirements, "
149 |             "from which you need to extract user questions and "
150 |             "corresponding SQL statements.\n\n"
151 |         )
152 |         prompt += f"```sql\n{text}```\n"
153 |         prompt += "Please output the summary information and SQL script in JSON format."
154 |         response = self.parsing_agent.step(prompt, response_format=QueryRecordResponseFormat)
155 |         query_record_response = QueryRecordResponseFormat.model_validate_json(
156 |             response.msgs[0].content
157 |         )
158 |         return SchemaParseResponse(data=query_record_response.items, usage=response.info["usage"])
159 | 
160 |     def split_markdown_by_h2(self, markdown_text):
161 |         sections = re.split(r'(?=^##\s+)', markdown_text, flags=re.MULTILINE)
162 |         sections = [section.strip() for section in sections if section.strip()]
163 |         return sections
164 | 


--------------------------------------------------------------------------------
/camel_database_agent/database_agent.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 Lei Zhang
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import json
 16 | import logging
 17 | import os
 18 | import random
 19 | from typing import Any, List, Optional, Union, cast
 20 | 
 21 | from camel.agents import BaseAgent, ChatAgent
 22 | from camel.embeddings import BaseEmbedding, OpenAIEmbedding
 23 | from camel.models import BaseModelBackend, ModelFactory
 24 | from camel.types import ModelPlatformType, ModelType
 25 | from colorama import Fore
 26 | from pydantic import BaseModel
 27 | from tabulate import tabulate
 28 | 
 29 | from camel_database_agent.database.dialect.dialect import (
 30 |     DatabaseSchemaDialect,
 31 | )
 32 | from camel_database_agent.database.manager import DatabaseManager
 33 | from camel_database_agent.database.schema import (
 34 |     DatabaseSchemaParse,
 35 |     DDLRecord,
 36 |     DMLRecord,
 37 |     QueryRecord,
 38 |     SchemaParseResponse,
 39 | )
 40 | from camel_database_agent.database_base import (
 41 |     AssistantMessage,
 42 |     HumanMessage,
 43 |     MessageLog,
 44 |     MessageLogToEmpty,
 45 |     SQLExecutionError,
 46 |     TokenUsage,
 47 |     TrainLevel,
 48 |     messages_log,
 49 |     strip_sql_code_block,
 50 |     timing,
 51 | )
 52 | from camel_database_agent.database_prompt import PromptTemplates
 53 | from camel_database_agent.datagen.pipeline import (
 54 |     DataQueryInferencePipeline,
 55 | )
 56 | from camel_database_agent.knowledge.knowledge import DatabaseKnowledge, RecordType
 57 | from camel_database_agent.knowledge.knowledge_qdrant import (
 58 |     DatabaseKnowledgeQdrant,
 59 | )
 60 | 
 61 | logger = logging.getLogger(__name__)
 62 | 
 63 | 
 64 | class QuestionMeta(BaseModel):
 65 |     question: str
 66 |     sql: str
 67 |     prompt: str
 68 |     usage: dict
 69 | 
 70 | 
 71 | class DatabaseAgentResponse(BaseModel):
 72 |     ask: str
 73 |     dataset: Optional[Any] = None
 74 |     sql: Optional[str] = None
 75 |     success: bool = True
 76 |     error: Optional[str] = None
 77 |     usage: Optional[dict] = None
 78 | 
 79 | 
 80 | class DatabaseAgent(BaseAgent):
 81 |     database_summary: str = ""
 82 |     recommendation_question: str = ""
 83 | 
 84 |     def step(self, *args: Any, **kwargs: Any) -> Any:
 85 |         pass
 86 | 
 87 |     def reset(self, *args: Any, **kwargs: Any) -> Any:
 88 |         pass
 89 | 
 90 |     def __init__(
 91 |         self,
 92 |         db_url: Optional[str] = None,
 93 |         ddl_sql: Optional[str] = None,
 94 |         data_sql: Optional[str] = None,
 95 |         polished_schema: bool = True,
 96 |         database_manager: Optional[DatabaseManager] = None,
 97 |         model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None,
 98 |         embedding_model: Optional[BaseEmbedding] = None,
 99 |         database_knowledge: Optional[DatabaseKnowledge] = None,
100 |         data_path: Optional[str] = None,
101 |         language: str = "English",
102 |         interactive_mode: bool = False,
103 |         read_only_model: bool = True,
104 |     ):
105 |         self.interactive_mode = interactive_mode
106 |         if database_manager:
107 |             self.database_manager = database_manager
108 |         else:
109 |             if db_url:
110 |                 self.database_manager = DatabaseManager(
111 |                     db_url=db_url, read_only_model=read_only_model
112 |                 )
113 |             else:
114 |                 raise ValueError("db_url or database_manager must be provided")
115 |         self.ddl_sql = ddl_sql
116 |         self.data_sql = data_sql
117 |         self.language = language
118 |         if not data_path:
119 |             user_home = os.path.expanduser("~")
120 |             data_path = os.path.join(user_home, "camel_database_agent_data")
121 |             if not os.path.exists(data_path):
122 |                 os.makedirs(data_path)
123 |         logger.info(f"Workspace: {data_path}")
124 |         self.data_path = data_path
125 |         self.polished_schema = polished_schema
126 |         self.model_backend = (
127 |             model
128 |             if model
129 |             else ModelFactory.create(
130 |                 model_platform=ModelPlatformType.DEFAULT,
131 |                 model_type=ModelType.DEFAULT,
132 |             )
133 |         )
134 |         self.embedding_model_backend = embedding_model if embedding_model else OpenAIEmbedding()
135 |         self.knowledge_path = os.path.join(str(self.data_path), "knowledge")
136 |         self.database_knowledge_backend = (
137 |             database_knowledge
138 |             if database_knowledge
139 |             else DatabaseKnowledgeQdrant(
140 |                 embedding=self.embedding_model_backend,
141 |                 model=self.model_backend,
142 |                 path=self.knowledge_path,
143 |             )
144 |         )
145 |         self.dialect: DatabaseSchemaDialect = DatabaseSchemaDialect.get_dialect(
146 |             dialect_name=self.database_manager.dialect_name(),
147 |             database_manager=self.database_manager,
148 |             model=self.model_backend,
149 |         )
150 |         self.schema_parse = DatabaseSchemaParse(
151 |             database_manager=self.database_manager, model=self.model_backend
152 |         )
153 |         self.agent = ChatAgent(
154 |             system_message="You are a business expert, skilled at in-depth "
155 |             "analysis of user data query requirements through "
156 |             "reverse engineering of database table structures.",
157 |             model=self.model_backend,
158 |             message_window_size=100,
159 |         )
160 |         if os.path.exists(os.path.join(self.knowledge_path, "database_summary.txt")):
161 |             with open(
162 |                 os.path.join(self.knowledge_path, "database_summary.txt"),
163 |                 "r",
164 |                 encoding="utf-8",
165 |             ) as f:
166 |                 self.database_summary = f.read()
167 |         if os.path.exists(os.path.join(self.knowledge_path, "recommendation_question.txt")):
168 |             with open(
169 |                 os.path.join(self.knowledge_path, "recommendation_question.txt"),
170 |                 "r",
171 |                 encoding="utf-8",
172 |             ) as f:
173 |                 self.recommendation_question = f.read()
174 |         if self.ddl_sql is None and os.path.exists(
175 |             os.path.join(self.knowledge_path, "ddl_sql.sql")
176 |         ):
177 |             with open(
178 |                 os.path.join(self.knowledge_path, "ddl_sql.sql"),
179 |                 "r",
180 |                 encoding="utf-8",
181 |             ) as f:
182 |                 self.ddl_sql = f.read()
183 |         if self.data_sql is None and os.path.exists(
184 |             os.path.join(self.knowledge_path, "data_sql.sql")
185 |         ):
186 |             with open(
187 |                 os.path.join(self.knowledge_path, "data_sql.sql"),
188 |                 "r",
189 |                 encoding="utf-8",
190 |             ) as f:
191 |                 self.data_sql = f.read()
192 | 
193 |     @timing
194 |     def _parse_schema_to_knowledge(self, polish: bool = False) -> TokenUsage:
195 |         """Generate schema data to knowledge"""
196 |         self.ddl_sql = (
197 |             self.dialect.get_polished_schema(self.language)
198 |             if polish
199 |             else self.dialect.get_schema()
200 |         )
201 |         # Save the schema to a file
202 |         with open(
203 |             os.path.join(self.knowledge_path, "ddl_origin.sql"),
204 |             "w",
205 |             encoding="utf-8",
206 |         ) as f:
207 |             f.write(self.dialect.get_schema())
208 | 
209 |         # Save the polished schema to a file
210 |         with open(
211 |             os.path.join(self.knowledge_path, "ddl_sql.sql"),
212 |             "w",
213 |             encoding="utf-8",
214 |         ) as f:
215 |             f.write(self.ddl_sql)
216 | 
217 |         schema_parse_response: SchemaParseResponse = self.schema_parse.parse_ddl_record(
218 |             self.ddl_sql
219 |         )
220 |         with open(
221 |             os.path.join(self.knowledge_path, "ddl_records.json"),
222 |             "w",
223 |             encoding="utf-8",
224 |         ) as f:
225 |             f.write(
226 |                 json.dumps(
227 |                     [record.model_dump() for record in schema_parse_response.data],
228 |                     ensure_ascii=False,
229 |                     indent=4,
230 |                 )
231 |             )
232 | 
233 |         self.database_knowledge_backend.add(schema_parse_response.data)
234 |         if schema_parse_response.usage is None:
235 |             return TokenUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0)
236 |         return TokenUsage(
237 |             completion_tokens=schema_parse_response.usage["completion_tokens"],
238 |             prompt_tokens=schema_parse_response.usage["prompt_tokens"],
239 |             total_tokens=schema_parse_response.usage["total_tokens"],
240 |         )
241 | 
242 |     @timing
243 |     def _parse_sampled_data_to_knowledge(self, data_samples_size: int = 5) -> TokenUsage:
244 |         """Generate sampled data to knowledge"""
245 |         self.data_sql = self.dialect.get_sampled_data(data_samples_size=data_samples_size)
246 |         with open(
247 |             os.path.join(self.knowledge_path, "data_sql.sql"),
248 |             "w",
249 |             encoding="utf-8",
250 |         ) as f:
251 |             f.write(self.data_sql)
252 | 
253 |         schema_parse_response: SchemaParseResponse = self.schema_parse.parse_sampled_record(
254 |             self.data_sql
255 |         )
256 | 
257 |         with open(
258 |             os.path.join(self.knowledge_path, "data_records.json"),
259 |             "w",
260 |             encoding="utf-8",
261 |         ) as f:
262 |             f.write(
263 |                 json.dumps(
264 |                     [record.model_dump() for record in schema_parse_response.data],
265 |                     ensure_ascii=False,
266 |                     indent=4,
267 |                 )
268 |             )
269 | 
270 |         self.database_knowledge_backend.add(schema_parse_response.data)
271 |         if schema_parse_response.usage is None:
272 |             return TokenUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0)
273 |         return TokenUsage(
274 |             completion_tokens=schema_parse_response.usage["completion_tokens"],
275 |             prompt_tokens=schema_parse_response.usage["prompt_tokens"],
276 |             total_tokens=schema_parse_response.usage["total_tokens"],
277 |         )
278 | 
279 |     @timing
280 |     def _parse_query_to_knowledge(self, query_samples_size: int = 20) -> TokenUsage:
281 |         """Generate some queries to knowledge"""
282 |         if self.ddl_sql and self.data_sql:
283 |             pipeline = DataQueryInferencePipeline(
284 |                 ddl_sql=self.ddl_sql,
285 |                 data_sql=self.data_sql,
286 |                 model=self.model_backend,
287 |                 database_manager=self.database_manager,
288 |                 language=self.language,
289 |             )
290 |             query_records: List[QueryRecord] = []
291 |             usage: Optional[dict] = None
292 |             while len(query_records) < query_samples_size:
293 |                 schema_parse_response: SchemaParseResponse = pipeline.generate(
294 |                     query_samples_size=query_samples_size
295 |                 )
296 |                 usage = schema_parse_response.usage
297 |                 query_records.extend(cast(List[QueryRecord], schema_parse_response.data))
298 |             with open(
299 |                 os.path.join(self.knowledge_path, "question_sql.txt"),
300 |                 "w",
301 |                 encoding="utf-8",
302 |             ) as f:
303 |                 for query_record in query_records:
304 |                     f.write(f"QUESTION: {query_record.question}\nSQL: {query_record.sql}\n\n")
305 | 
306 |             self.database_knowledge_backend.add(query_records)
307 |             if usage is None:
308 |                 return TokenUsage(completion_tokens=0, prompt_tokens=0, total_tokens=0)
309 |             return TokenUsage(
310 |                 completion_tokens=usage["completion_tokens"],
311 |                 prompt_tokens=usage["prompt_tokens"],
312 |                 total_tokens=usage["total_tokens"],
313 |             )
314 |         else:
315 |             raise ValueError("ddl_sql and data_sql must be provided")
316 | 
317 |     @timing
318 |     def _generate_database_summary(self, query_samples_size: int) -> TokenUsage:
319 |         self.ddl_sql = (
320 |             self.dialect.get_polished_schema(self.language)
321 |             if not self.polished_schema
322 |             else self.dialect.get_schema()
323 |         )
324 |         query_samples: List[QueryRecord] = (
325 |             self.database_knowledge_backend.get_query_collection_sample(query_samples_size)
326 |         )
327 | 
328 |         prompt = PromptTemplates.DATABASE_SUMMARY_OUTPUT_EXAMPLE
329 |         prompt = prompt.replace("{{ddl_sql}}", self.ddl_sql)
330 |         prompt = prompt.replace("{{language}}", self.language)
331 | 
332 |         response = self.agent.step(prompt)
333 |         self.database_summary = response.msgs[0].content
334 |         with open(
335 |             os.path.join(self.knowledge_path, "database_summary.txt"),
336 |             "w",
337 |             encoding="utf-8",
338 |         ) as f:
339 |             f.write(self.database_summary)
340 |         self.recommendation_question = "\n".join(
341 |             [query_sample.question for query_sample in query_samples]
342 |         )
343 |         with open(
344 |             os.path.join(self.knowledge_path, "recommendation_question.txt"),
345 |             "w",
346 |             encoding="utf-8",
347 |         ) as f:
348 |             f.write(self.recommendation_question)
349 | 
350 |         return TokenUsage(
351 |             completion_tokens=response.info['usage']["completion_tokens"],
352 |             prompt_tokens=response.info['usage']["prompt_tokens"],
353 |             total_tokens=response.info['usage']["total_tokens"],
354 |         )
355 | 
356 |     def get_summary(self) -> str:
357 |         return self.database_summary
358 | 
359 |     def get_recommendation_question(self, sampled_num: int = 5) -> str:
360 |         """
361 |         Returns a string with randomly sampled questions from the recommendation_question list.
362 | 
363 |         Args:
364 |             sampled_num: Number of questions to sample
365 | 
366 |         Returns:
367 |             A string with sampled questions (one per line)
368 |         """
369 |         questions = self.recommendation_question.strip().split('\n')
370 | 
371 |         # Ensure we don't try to sample more questions than available
372 |         sampled_num = min(sampled_num, len(questions))
373 | 
374 |         # Randomly sample questions
375 |         sampled_questions = random.sample(questions, sampled_num)
376 | 
377 |         return '\n'.join(sampled_questions)
378 | 
379 |     def add_knowledge(self, records: List[RecordType]) -> None:
380 |         self.database_knowledge_backend.add(records)
381 | 
382 |     @timing
383 |     def train_knowledge(
384 |         self,
385 |         level: TrainLevel = TrainLevel.MEDIUM,
386 |         reset_train: bool = False,
387 |     ) -> TokenUsage:
388 |         """Train knowledge"""
389 |         data_samples_size = 20
390 |         table_count = len(self.dialect.get_table_names())
391 |         query_samples_size = table_count
392 |         if level == TrainLevel.LOW:
393 |             query_samples_size = table_count * 2
394 |         elif level == TrainLevel.MEDIUM:
395 |             query_samples_size = table_count * 5
396 |         elif level == TrainLevel.HIGH:
397 |             query_samples_size = table_count * 10
398 | 
399 |         if reset_train and os.path.exists(self.knowledge_path):
400 |             self.database_knowledge_backend.clear()
401 |             self.ddl_sql = None
402 |             self.data_sql = None
403 |             self.database_summary = ""
404 |             self.recommendation_question = ""
405 |             logger.info("Reset knowledge...")
406 | 
407 |         if (
408 |             self.database_knowledge_backend.get_table_collection_size() == 0
409 |             or self.database_knowledge_backend.get_data_collection_size() == 0
410 |             or self.database_knowledge_backend.get_query_collection_size() == 0
411 |         ):
412 |             message = (
413 |                 f"Initial knowledge base training on {table_count} tables. "
414 |                 f"It will take {Fore.GREEN}about {int(table_count * 28 / 60)} minutes.{Fore.RESET}"
415 |             )
416 |             if self.interactive_mode:
417 |                 logger.info(f"\r{message}")
418 |             else:
419 |                 logger.info(message)
420 | 
421 |         token_usage: TokenUsage = TokenUsage()
422 | 
423 |         if self.database_knowledge_backend.get_table_collection_size() == 0:
424 |             token_usage.add_token(self._parse_schema_to_knowledge(polish=self.polished_schema))
425 | 
426 |         if self.database_knowledge_backend.get_data_collection_size() == 0:
427 |             token_usage.add_token(
428 |                 self._parse_sampled_data_to_knowledge(data_samples_size=data_samples_size)
429 |             )
430 | 
431 |         if self.database_knowledge_backend.get_query_collection_size() == 0:
432 |             token_usage.add_token(self._parse_query_to_knowledge(query_samples_size))
433 | 
434 |         if not self.database_summary or reset_train:
435 |             token_usage.add_token(
436 |                 self._generate_database_summary(query_samples_size=query_samples_size)
437 |             )
438 | 
439 |         return token_usage
440 | 
441 |     @timing
442 |     def question_to_sql(self, question: str, dialect_name: str) -> QuestionMeta:
443 |         """Question to SQL"""
444 |         prompt = PromptTemplates.QUESTION_CONVERT_SQL.replace("{{dialect_name}}", dialect_name)
445 | 
446 |         ddl_records: List[DDLRecord] = self.database_knowledge_backend.query_ddl(question)
447 |         prompt = prompt.replace(
448 |             "{{table_schema}}", "\n".join([record.sql for record in ddl_records])
449 |         )
450 | 
451 |         data_records: List[DMLRecord] = self.database_knowledge_backend.query_data(question)
452 |         prompt = prompt.replace(
453 |             "{{sample_data}}", "\n".join([record.dataset for record in data_records])
454 |         )
455 | 
456 |         query_records: List[QueryRecord] = self.database_knowledge_backend.query_query(question)
457 |         prompt = prompt.replace(
458 |             "{{qa_pairs}}",
459 |             "\n".join(
460 |                 [f"QUESTION: {record.question}\nSQL: {record.sql}\n\n" for record in query_records]
461 |             ),
462 |         )
463 | 
464 |         prompt = prompt.replace("{{question}}", question)
465 |         logger.debug(Fore.GREEN + "PROMPT:" + prompt)
466 |         self.agent.reset()
467 |         response = self.agent.step(prompt)
468 | 
469 |         return QuestionMeta(
470 |             question=question,
471 |             sql=strip_sql_code_block(response.msgs[0].content),
472 |             prompt=prompt,
473 |             usage=response.info['usage'],
474 |         )
475 | 
476 |     @messages_log
477 |     def ask(
478 |         self,
479 |         session_id: str,
480 |         question: str,
481 |         message_log: Optional[MessageLog] = None,
482 |         bind_pd: Optional[bool] = True,
483 |     ) -> DatabaseAgentResponse:
484 |         if not message_log:
485 |             message_log = MessageLogToEmpty()
486 |         message_log.messages_writer(HumanMessage(session_id=session_id, content=question))
487 |         question_meta = self.question_to_sql(
488 |             question=question,
489 |             dialect_name=self.database_manager.dialect_name(),
490 |         )
491 |         try:
492 |             message_log.messages_writer(
493 |                 AssistantMessage(session_id=session_id, content=question_meta.sql)
494 |             )
495 |             dataset = self.database_manager.select(sql=question_meta.sql, bind_pd=bind_pd)
496 |             message_log.messages_writer(
497 |                 AssistantMessage(
498 |                     session_id=session_id,
499 |                     content=tabulate(dataset, headers="keys", tablefmt="psql"),
500 |                 )
501 |             )
502 |             return DatabaseAgentResponse(
503 |                 ask=question,
504 |                 dataset=dataset,
505 |                 sql=question_meta.sql,
506 |                 usage=question_meta.usage,
507 |             )
508 |         except SQLExecutionError as e:
509 |             message_log.messages_writer(AssistantMessage(session_id=session_id, content=str(e)))
510 |             return DatabaseAgentResponse(
511 |                 ask=question,
512 |                 dataset=None,
513 |                 sql=e.sql,
514 |                 success=False,
515 |                 error=e.error_message,
516 |                 usage=question_meta.usage,
517 |             )
518 |         except Exception as e:
519 |             message_log.messages_writer(AssistantMessage(session_id=session_id, content=str(e)))
520 |             return DatabaseAgentResponse(
521 |                 ask=question,
522 |                 dataset=None,
523 |                 sql=question_meta.sql,
524 |                 success=False,
525 |                 error=str(e),
526 |             )
527 | 


--------------------------------------------------------------------------------
/camel_database_agent/database_base.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 Lei Zhang
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import logging
 16 | import os
 17 | import sys
 18 | import time
 19 | from abc import ABC, abstractmethod
 20 | from asyncio import Event
 21 | from enum import Enum
 22 | from functools import wraps
 23 | from itertools import cycle
 24 | from threading import Thread
 25 | from typing import Any, Callable, TypeVar, cast
 26 | 
 27 | from colorama import Fore
 28 | from pydantic import BaseModel
 29 | 
 30 | logger = logging.getLogger(__name__)
 31 | 
 32 | 
 33 | class TrainLevel(Enum):
 34 |     """Enum class for training levels."""
 35 | 
 36 |     LOW = "low"
 37 |     MEDIUM = "medium"
 38 |     HIGH = "high"
 39 | 
 40 | 
 41 | class TokenUsage(BaseModel):
 42 |     completion_tokens: int = 0
 43 |     prompt_tokens: int = 0
 44 |     total_tokens: int = 0
 45 | 
 46 |     def add_token(self, usage: "TokenUsage"):
 47 |         self.completion_tokens += usage.completion_tokens
 48 |         self.prompt_tokens += usage.prompt_tokens
 49 |         self.total_tokens += usage.total_tokens
 50 | 
 51 | 
 52 | class Message(BaseModel):
 53 |     session_id: str
 54 |     role: str
 55 |     content: str
 56 | 
 57 | 
 58 | class HumanMessage(Message):
 59 |     role: str = "user"
 60 | 
 61 | 
 62 | class AssistantMessage(Message):
 63 |     role: str = "assistant"
 64 | 
 65 | 
 66 | class MessageLog(ABC):
 67 |     @abstractmethod
 68 |     def messages_writer(self, message: Message) -> None:
 69 |         raise NotImplementedError
 70 | 
 71 | 
 72 | class MessageLogToEmpty(MessageLog):
 73 |     def messages_writer(self, message: Message) -> None:
 74 |         pass
 75 | 
 76 | 
 77 | class MessageLogToFile(MessageLog):
 78 |     def __init__(self, f: Any):
 79 |         self.f = f
 80 | 
 81 |     def messages_writer(self, message: Message) -> None:
 82 |         self.f.write(message.model_dump_json() + "\n")
 83 | 
 84 | 
 85 | class SQLExecutionError(Exception):
 86 |     """Exception raised for SQL execution errors.
 87 | 
 88 |     Attributes:
 89 |         sql -- the SQL statement that caused the error
 90 |         error_message -- explanation of the error
 91 |     """
 92 | 
 93 |     def __init__(self, sql: str, error_message: str):
 94 |         self.sql = sql
 95 |         self.error_message = error_message
 96 |         super().__init__(f"SQL execution error: {error_message}\nSQL: {sql}")
 97 | 
 98 | 
 99 | T = TypeVar("T", bound=Callable[..., Any])
100 | 
101 | 
102 | def spinner(stop_event, message=""):
103 |     spinner_chars = ['⣾', '⣽', '⣻', '⢿', '⡿', '⣟', '⣯', '⣷']
104 |     for char in cycle(spinner_chars):
105 |         if stop_event.is_set():
106 |             break
107 |         sys.stdout.write(f"\r{Fore.LIGHTGREEN_EX}{message}{char}{Fore.RESET}")
108 |         sys.stdout.flush()
109 |         time.sleep(0.1)
110 |     # Clear the entire line before exiting
111 |     sys.stdout.write('\r' + ' ' * 100 + '\r')
112 |     sys.stdout.flush()
113 | 
114 | 
115 | def timing(func: T) -> T:
116 |     @wraps(func)
117 |     def timing_wrapper(*args: Any, **kwargs: Any) -> Any:
118 |         info = func.__name__
119 |         func_doc = func.__doc__
120 |         if func_doc:
121 |             info = func_doc
122 |         start_time = time.perf_counter()
123 | 
124 |         stop_spinner = Event()
125 |         spinner_thread = Thread(target=spinner, args=(stop_spinner, "Thinking..."))
126 |         spinner_thread.daemon = True
127 |         try:
128 |             spinner_thread.start()
129 |             result = func(*args, **kwargs)
130 |         finally:
131 |             # sys.stdout.write('\r' + ' ' * 100 + '\r')
132 |             stop_spinner.set()
133 |             spinner_thread.join()
134 |             end_time = time.perf_counter()
135 |             total_time = end_time - start_time
136 |             logger.info(f"\r{info} Took {Fore.GREEN}{total_time:.4f} seconds{Fore.RESET}")
137 |         return result
138 | 
139 |     return cast(T, timing_wrapper)
140 | 
141 | 
142 | def messages_log(func: T) -> T:
143 |     @wraps(func)
144 |     def wrapper(*args: Any, **kwargs: Any) -> Any:
145 |         session_path = os.path.join(
146 |             str(args[0].data_path), str(kwargs.get("session_id", "default"))
147 |         )
148 |         if not os.path.exists(session_path):
149 |             os.makedirs(session_path, exist_ok=True)
150 | 
151 |         with open(os.path.join(session_path, "messages.jsonl"), "a", encoding="utf-8") as f:
152 |             kwargs["message_log"] = MessageLogToFile(f)
153 |             return func(*args, **kwargs)
154 | 
155 |     return cast(T, wrapper)
156 | 
157 | 
158 | def strip_sql_code_block(sql: str) -> str:
159 |     """Remove Markdown SQL code block delimiters from the given string."""
160 |     sql = sql.strip()
161 |     if sql.startswith("```sql"):
162 |         sql = sql[6:]
163 |     if sql.endswith("```"):
164 |         sql = sql[:-3]
165 |     return sql.strip()  # Add extra strip to remove any whitespace after delimiters
166 | 


--------------------------------------------------------------------------------
/camel_database_agent/database_prompt.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import textwrap
16 | 
17 | 
18 | class PromptTemplates:
19 |     DATABASE_SUMMARY_OUTPUT_EXAMPLE = textwrap.dedent("""
20 |     You are a business database expert. Please generate a {{language}} database summary based on the following table structure, with the aim of helping people understand what information this database can provide from a business perspective.
21 |     
22 |     ## Table Schema
23 |     {{ddl_sql}}
24 |     
25 |     ## Output Example
26 |     
27 |     This database is the core data model of a typical e-commerce system,  
28 |     including modules for user management, product management, order transactions,  
29 |     payment processes, and address management.  
30 |     
31 |     It achieves a complete business loop through multi-table associations  
32 |     (such as user-order-product-payment), supporting users throughout  
33 |     the entire process from registration, browsing products,  
34 |     placing orders and making payments to receiving goods.  
35 |     
36 |     Each table ensures data consistency through foreign key constraints  
37 |     (such as the strong association between orders and users or addresses)  
38 |     and includes timestamp fields (`created_at`/`updated_at`) for tracking data changes.
39 |     
40 |     Now, You only need to output a descriptive text in {{language}}.
41 |     """)
42 | 
43 |     QUESTION_CONVERT_SQL = textwrap.dedent("""
44 |     The following is the table structure in the database and some common query SQL statements. Please convert the user's question into an SQL query statement. Note to comply with sqlite syntax. Do not explain, just provide the SQL directly.
45 |     
46 |     Database System: {{dialect_name}}
47 |     
48 |     ## Table Schema
49 |     ```sql
50 |     {{table_schema}}
51 |     ```
52 |     
53 |     ## Data Example
54 |     ```sql
55 |     {{sample_data}}
56 |     ```
57 |     ## Few-Shot Example
58 |     {{qa_pairs}}
59 |     
60 |     ## User Question
61 |     {{question}}
62 |     
63 |     ## Instructions
64 |     1. Follow {{dialect_name}} syntax
65 |     2. Do not provide explanations, just give the SQL statement directly
66 |     """)
67 | 


--------------------------------------------------------------------------------
/camel_database_agent/datagen/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/camel_database_agent/datagen/pipeline.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 Lei Zhang
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import logging
 16 | from typing import List, Optional, Union
 17 | 
 18 | from camel.agents import ChatAgent
 19 | from camel.models import BaseModelBackend, ModelFactory
 20 | from camel.types import ModelPlatformType, ModelType
 21 | from colorama import Fore
 22 | 
 23 | from camel_database_agent.core.exceptions import QueryParsingError
 24 | from camel_database_agent.database.manager import DatabaseManager
 25 | from camel_database_agent.database.schema import (
 26 |     QueryRecord,
 27 |     QueryRecordResponseFormat,
 28 |     SchemaParseResponse,
 29 | )
 30 | from camel_database_agent.database_base import SQLExecutionError, timing
 31 | from camel_database_agent.datagen.prompts import PromptTemplates
 32 | 
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | 
 36 | class DataQueryInferencePipeline:
 37 |     def __init__(
 38 |         self,
 39 |         ddl_sql: str,
 40 |         data_sql: str,
 41 |         database_manager: DatabaseManager,
 42 |         model: Optional[Union[BaseModelBackend, List[BaseModelBackend]]] = None,
 43 |         language: str = "English",
 44 |         prompt_templates: Optional[PromptTemplates] = None,
 45 |     ):
 46 |         self.model_backend = (
 47 |             model
 48 |             if model
 49 |             else ModelFactory.create(
 50 |                 model_platform=ModelPlatformType.DEFAULT,
 51 |                 model_type=ModelType.DEFAULT,
 52 |             )
 53 |         )
 54 |         self.ddl_sql = ddl_sql
 55 |         self.data_sql = data_sql
 56 |         self.database_manager = database_manager
 57 |         self.prompt_templates = prompt_templates or PromptTemplates()
 58 |         self.question_agent = ChatAgent(
 59 |             system_message="You are a business expert, skilled at deeply "
 60 |             "analyzing user data query requirements based on "
 61 |             "database table structures.",
 62 |             model=model,
 63 |             output_language=language,
 64 |         )
 65 | 
 66 |     def _prepare_prompt(self, query_samples_needed: int) -> str:
 67 |         """Prepare the prompt words for generating queries."""
 68 |         prompt = self.prompt_templates.QUESTION_INFERENCE_PIPELINE
 69 |         prompt = prompt.replace("{{ddl_sql}}", self.ddl_sql)
 70 |         prompt = prompt.replace("{{data_sql}}", self.data_sql)
 71 |         prompt = prompt.replace("{{query_samples_size}}", str(query_samples_needed))
 72 |         prompt = prompt.replace("{{dialect_name}}", self.database_manager.dialect_name())
 73 |         return prompt
 74 | 
 75 |     def _parse_response_content(self, content: str) -> List[QueryRecord]:
 76 |         """Parse the response content into a list of QueryRecords."""
 77 |         if content.startswith("```json") or content.startswith("```"):
 78 |             content = content.split("\n", 1)[1]  # Remove ```json
 79 |         if content.endswith("```"):
 80 |             content = content.rsplit("\n", 1)[0]  # Remove ```
 81 | 
 82 |         try:
 83 |             structured_response = QueryRecordResponseFormat.model_validate_json(content)
 84 |             return structured_response.items
 85 |         except Exception as e:
 86 |             raise QueryParsingError(f"Failed to parse response: {e!s}")
 87 | 
 88 |     def _validate_query(self, query_record: QueryRecord) -> bool:
 89 |         """Verify whether the query is executable."""
 90 |         try:
 91 |             self.database_manager.select(query_record.sql)
 92 |             return True
 93 |         except SQLExecutionError as e:
 94 |             logger.debug(f"{Fore.RED}SQLExecutionError{Fore.RESET}: {e.sql} {e.error_message}")
 95 |             return False
 96 |         except Exception as e:
 97 |             logger.error(
 98 |                 f"An error occurred while executing the query: "
 99 |                 f"{query_record.question} {query_record.sql} {e!s}"
100 |             )
101 |             return False
102 | 
103 |     @timing
104 |     def generate(self, query_samples_size: int = 20) -> SchemaParseResponse:
105 |         """Data generation for samples"""
106 | 
107 |         dataset: List[QueryRecord] = []
108 |         usage: Optional[dict] = None
109 |         error_query_records: List[QueryRecord] = []
110 | 
111 |         while len(dataset) < query_samples_size:
112 |             try:
113 |                 # Calculate the number of samples to be generated this time.
114 |                 samples_needed = query_samples_size - len(dataset)
115 |                 prompt = self._prepare_prompt(samples_needed)
116 | 
117 |                 response = self.question_agent.step(
118 |                     prompt, response_format=QueryRecordResponseFormat
119 |                 )
120 |                 if response.info and 'usage' in response.info:
121 |                     usage = response.info['usage']
122 |                 content = response.msgs[0].content.strip()
123 | 
124 |                 # Analyze response content
125 |                 query_records = self._parse_response_content(content)
126 | 
127 |                 # Validate and collect valid queries.
128 |                 for item in query_records:
129 |                     if self._validate_query(item):
130 |                         dataset.append(item)
131 |                         logger.info(
132 |                             f"Sample collection progress: "
133 |                             f"{Fore.GREEN}{len(dataset)}/{query_samples_size}{Fore.RESET}"
134 |                         )
135 |                     else:
136 |                         error_query_records.append(item)
137 | 
138 |                 # If there are multiple consecutive instances without valid
139 |                 # samples, consider redesigning the prompt or exiting early.
140 | 
141 |             except QueryParsingError as e:
142 |                 logger.error(f"Failed to parse response: {e!s}")
143 |             except Exception as e:
144 |                 logger.error(f"An unexpected error occurred while generating the sample: {e!s}")
145 | 
146 |         return SchemaParseResponse(
147 |             data=dataset[:query_samples_size],
148 |             usage=usage,
149 |             errors=error_query_records if error_query_records else None,
150 |         )
151 | 


--------------------------------------------------------------------------------
/camel_database_agent/datagen/prompts.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | import textwrap
16 | 
17 | 
18 | class PromptTemplates:
19 |     QUESTION_INFERENCE_PIPELINE = textwrap.dedent("""
20 |     # JSON Format Request
21 |     
22 |     You are a specialized JSON generator. Your only function is to parse the provided data and convert it to JSON format, strictly following the format requirements.    
23 | 
24 |     ## Database Schema:
25 |     ```
26 |     {{ddl_sql}}
27 |     ```
28 | 
29 |     ## Data Example:
30 |     ```sql
31 |     {{data_sql}}
32 |     ```
33 | 
34 |     ## Instructions:
35 |     Database System: {{dialect_name}}
36 |     1. Please carefully analyze the following database information and conduct an in-depth analysis from a business perspective. What business query questions might users raise? Please fully consider some complex query scenarios, including but not limited to multi-table associations, grouping statistics, etc.
37 |     2. Please ensure that the SQL you write conforms to {{dialect_name}} syntax.
38 |     3. Generate {{query_samples_size}} real user query questions along with the corresponding SQL query statements without using placeholders
39 |     4. Create a JSON array with each table as an object
40 |     5. Each object must have exactly three fields:
41 |        - "id": the table name
42 |        - "question": a query in natural language.
43 |        - "sql": sql statements without placeholders.
44 |     6. The entire response must be ONLY valid JSON without any additional text, explanation, or markdown code blocks
45 |     
46 |     ## Required Output Format:
47 |     {
48 |         "items":[{
49 |             "id": "<table name>",
50 |             "question": "<a query in natural language>",
51 |             "sql": "<sql statements>"
52 |         }]
53 |     }
54 |     
55 |     ## IMPORTANT:
56 |     - Your response must contain ONLY the JSON object, nothing else
57 |     - Do not include explanations, introductions, or conclusions
58 |     - Do not use markdown code blocks (```) around the JSON
59 |     - Do not include phrases like "Here's the JSON" or "I've created the JSON"
60 |     - Do not indicate that you are providing the output in any way.""")
61 | 


--------------------------------------------------------------------------------
/camel_database_agent/knowledge/__init__.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2025 Lei Zhang
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | 
15 | 


--------------------------------------------------------------------------------
/camel_database_agent/knowledge/knowledge.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 Lei Zhang
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from abc import ABC, abstractmethod
 16 | from typing import Any, Dict, Generic, List, Type, TypeVar, Union
 17 | 
 18 | from camel.agents import ChatAgent
 19 | from camel.embeddings import BaseEmbedding
 20 | from camel.models import BaseModelBackend
 21 | from camel.storages import (
 22 |     BaseVectorStorage,
 23 |     VectorDBQuery,
 24 |     VectorDBQueryResult,
 25 |     VectorRecord,
 26 | )
 27 | 
 28 | from camel_database_agent.core.exceptions import KnowledgeException
 29 | from camel_database_agent.core.method_lru_cache import method_lru_cache
 30 | from camel_database_agent.database.schema import (
 31 |     DDLRecord,
 32 |     DMLRecord,
 33 |     QueryRecord,
 34 | )
 35 | 
 36 | RecordType = TypeVar("RecordType", DDLRecord, DMLRecord, QueryRecord)
 37 | T = TypeVar("T", DDLRecord, DMLRecord, QueryRecord)
 38 | 
 39 | 
 40 | class DatabaseKnowledge(ABC, Generic[T]):
 41 |     def __init__(
 42 |         self,
 43 |         embedding: BaseEmbedding,
 44 |         model: Union[BaseModelBackend, List[BaseModelBackend]],
 45 |         table_storage: BaseVectorStorage,
 46 |         data_storage: BaseVectorStorage,
 47 |         query_storage: BaseVectorStorage,
 48 |         **data: Any,
 49 |     ):
 50 |         super().__init__(**data)
 51 |         self.embedding = embedding
 52 |         self.table_storage = table_storage
 53 |         self.data_storage = data_storage
 54 |         self.query_storage = query_storage
 55 |         self.ddl_parsing_agent = ChatAgent(
 56 |             system_message="You are a database expert, skilled at parsing "
 57 |             "DDL statements, extracting key information, and "
 58 |             "converting it into JSON format.",
 59 |             model=model,
 60 |             message_window_size=10,
 61 |         )
 62 | 
 63 |         # 存储类型与存储介质的映射
 64 |         self._storage_map: Dict[Type[RecordType], BaseVectorStorage] = {  # type: ignore[valid-type]
 65 |             DDLRecord: self.table_storage,
 66 |             DMLRecord: self.data_storage,
 67 |             QueryRecord: self.query_storage,
 68 |         }
 69 | 
 70 |         # 记录类型与嵌入内容字段的映射
 71 |         self._embed_field_map: Dict[Type[RecordType], str] = {  # type: ignore[valid-type]
 72 |             DDLRecord: "summary",
 73 |             DMLRecord: "summary",
 74 |             QueryRecord: "question",
 75 |         }
 76 | 
 77 |     def add(self, records: List[T]) -> None:
 78 |         """添加记录到相应存储中"""
 79 |         # 按类型分组记录
 80 |         grouped_records: Dict[Type[RecordType], List[RecordType]] = {}  # type: ignore[valid-type]
 81 |         for record in records:
 82 |             record_type = type(record)
 83 |             if record_type not in self._storage_map:
 84 |                 raise KnowledgeException(f"不支持的记录类型: {record_type}")
 85 | 
 86 |             if record_type not in grouped_records:
 87 |                 grouped_records[record_type] = []
 88 |             grouped_records[record_type].append(record)
 89 | 
 90 |         # 为每种类型创建向量记录并添加到存储中
 91 |         for record_type, type_records in grouped_records.items():
 92 |             storage = self._storage_map[record_type]
 93 |             embed_field = self._embed_field_map[record_type]
 94 | 
 95 |             try:
 96 |                 v_records = [
 97 |                     VectorRecord(
 98 |                         vector=self.embedding.embed(getattr(record, embed_field)),
 99 |                         payload=record.model_dump(),  # type: ignore[attr-defined]
100 |                     )
101 |                     for record in type_records
102 |                 ]
103 |                 storage.add(v_records)
104 |             except Exception as e:
105 |                 raise KnowledgeException(f"添加记录时发生错误: {e!s}")
106 | 
107 |     @method_lru_cache(maxsize=128)
108 |     def _generic_query(self, query: str, record_type: Type[T], top_k: int = 8) -> List[T]:
109 |         """General query method, supports caching."""
110 |         storage = self._storage_map.get(record_type)
111 |         if not storage:
112 |             raise KnowledgeException(f"未找到记录类型 {record_type.__name__} 的存储")
113 | 
114 |         try:
115 |             query_vector = self.embedding.embed(query)
116 |             vector_result: List[VectorDBQueryResult] = storage.query(
117 |                 VectorDBQuery(query_vector=query_vector, top_k=top_k)
118 |             )
119 | 
120 |             records = []
121 |             for result in vector_result:
122 |                 if result.record.payload is not None:
123 |                     record: T = record_type(**result.record.payload)
124 |                     records.append(record)
125 |             return records
126 |         except Exception as e:
127 |             raise KnowledgeException(f"查询 {record_type.__name__} 时发生错误: {e!s}")
128 | 
129 |     def query_ddl(self, query: str, top_k: int = 8) -> List[DDLRecord]:
130 |         """查询DDL记录"""
131 |         return self._generic_query(query, DDLRecord, top_k)
132 | 
133 |     def query_data(self, query: str, top_k: int = 8) -> List[DMLRecord]:
134 |         """查询DML记录"""
135 |         return self._generic_query(query, DMLRecord, top_k)
136 | 
137 |     def query_query(self, query: str, top_k: int = 8) -> List[QueryRecord]:
138 |         """查询Query记录"""
139 |         return self._generic_query(query, QueryRecord, top_k)
140 | 
141 |     @abstractmethod
142 |     def clear(self) -> None:
143 |         """清除所有存储数据"""
144 |         raise NotImplementedError
145 | 
146 |     @abstractmethod
147 |     def get_table_collection_size(self) -> int:
148 |         """获取表集合的大小"""
149 |         raise NotImplementedError
150 | 
151 |     @abstractmethod
152 |     def get_data_collection_size(self) -> int:
153 |         """获取数据集合的大小"""
154 |         raise NotImplementedError
155 | 
156 |     @abstractmethod
157 |     def get_query_collection_size(self) -> int:
158 |         """获取查询集合的大小"""
159 |         raise NotImplementedError
160 | 
161 |     @abstractmethod
162 |     def get_query_collection_sample(self, n: int = 20) -> List[QueryRecord]:
163 |         """获取查询集合的样本"""
164 |         raise NotImplementedError
165 | 


--------------------------------------------------------------------------------
/camel_database_agent/knowledge/knowledge_qdrant.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2025 Lei Zhang
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | import logging
 16 | import random
 17 | from typing import List, Optional, Union
 18 | 
 19 | from camel.embeddings import BaseEmbedding
 20 | from camel.models import BaseModelBackend
 21 | from camel.storages import QdrantStorage
 22 | from qdrant_client.conversions.common_types import CollectionInfo
 23 | 
 24 | from camel_database_agent.database.schema import QueryRecord
 25 | from camel_database_agent.knowledge.knowledge import DatabaseKnowledge
 26 | 
 27 | logger = logging.getLogger(__name__)
 28 | 
 29 | 
 30 | class DatabaseKnowledgeQdrant(DatabaseKnowledge):
 31 |     def __init__(
 32 |         self,
 33 |         embedding: BaseEmbedding,
 34 |         model: Union[BaseModelBackend, List[BaseModelBackend]],
 35 |         path: Optional[str] = None,
 36 |     ):
 37 |         self.path = path
 38 |         try:
 39 |             table_storage = QdrantStorage(
 40 |                 vector_dim=embedding.get_output_dim(),
 41 |                 collection_name="table_documents",
 42 |                 path=path if path else ":memory:",
 43 |             )
 44 |             data_storage = QdrantStorage(
 45 |                 vector_dim=embedding.get_output_dim(),
 46 |                 collection_name="data_documents",
 47 |                 path=path if path else ":memory:",
 48 |             )
 49 |             query_storage = QdrantStorage(
 50 |                 vector_dim=embedding.get_output_dim(),
 51 |                 collection_name="query_documents",
 52 |                 path=path if path else ":memory:",
 53 |             )
 54 |         except ValueError as e:
 55 |             logger.error(
 56 |                 "Adjust your embedding model to output vectors with "
 57 |                 "the same dimensions as the existing collection. "
 58 |                 "Alternatively, delete the existing collection and "
 59 |                 "recreate it with your current embedding dimensions "
 60 |                 "(note: this will result in the loss of all existing "
 61 |                 "data)."
 62 |             )
 63 |             raise e
 64 |         super().__init__(
 65 |             embedding=embedding,
 66 |             model=model,
 67 |             table_storage=table_storage,
 68 |             data_storage=data_storage,
 69 |             query_storage=query_storage,
 70 |         )
 71 | 
 72 |     def clear(self) -> None:
 73 |         self.table_storage.clear()
 74 |         self.data_storage.clear()
 75 |         self.query_storage.clear()
 76 | 
 77 |     def get_table_collection_size(self) -> int:
 78 |         collection_info: CollectionInfo = self.table_storage.client.get_collection(
 79 |             "table_documents"
 80 |         )
 81 |         return collection_info.points_count if collection_info.points_count else 0
 82 | 
 83 |     def get_data_collection_size(self) -> int:
 84 |         collection_info: CollectionInfo = self.data_storage.client.get_collection("data_documents")
 85 |         return collection_info.points_count if collection_info.points_count else 0
 86 | 
 87 |     def get_query_collection_size(self) -> int:
 88 |         collection_info: CollectionInfo = self.query_storage.client.get_collection(
 89 |             "query_documents"
 90 |         )
 91 |         return collection_info.points_count if collection_info.points_count else 0
 92 | 
 93 |     def get_query_collection_sample(self, n: int = 20) -> List[QueryRecord]:
 94 |         # Get actual point IDs from the collection
 95 |         collection_info = self.query_storage.client.scroll(
 96 |             collection_name="query_documents",
 97 |             limit=self.get_query_collection_size(),
 98 |         )
 99 |         point_ids = [point.id for point in collection_info[0]]
100 | 
101 |         # Sample n random IDs from actual IDs
102 |         random_ids = random.sample(point_ids, min(n, len(point_ids)))
103 | 
104 |         # Retrieve points using correct IDs
105 |         search_result = self.query_storage.client.retrieve("query_documents", ids=random_ids)
106 |         return [QueryRecord(**record.payload) for record in search_result]
107 | 


--------------------------------------------------------------------------------
/camel_database_agent/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/camel_database_agent/py.typed


--------------------------------------------------------------------------------
/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | from dotenv import load_dotenv
 5 | 
 6 | 
 7 | @pytest.fixture(scope="session", autouse=True)
 8 | def load_env() -> None:
 9 |     load_dotenv(os.path.join(os.path.dirname(__file__), ".env"))
10 | 


--------------------------------------------------------------------------------
/database/mysql/1_ddl_sql.sql:
--------------------------------------------------------------------------------
  1 | CREATE TABLE Buildings (
  2 |     BuildingCode VARCHAR(50) NOT NULL PRIMARY KEY COMMENT '建筑物代码',
  3 |     BuildingName TEXT COMMENT '建筑物名称',
  4 |     NumberOfFloors INT COMMENT '楼层数量',
  5 |     ElevatorAccess TINYINT(1) NOT NULL DEFAULT 0 COMMENT '电梯可用性',
  6 |     SiteParkingAvailable TINYINT(1) NOT NULL DEFAULT 0 COMMENT '场地停车可用性'
  7 | ) COMMENT = '建筑物信息表';
  8 | 
  9 | CREATE TABLE Categories (
 10 |     CategoryID VARCHAR(50) NOT NULL PRIMARY KEY COMMENT '类别ID',
 11 |     CategoryDescription TEXT COMMENT '类别描述',
 12 |     DepartmentID INT DEFAULT 0 COMMENT '部门ID'
 13 | ) COMMENT = '类别信息表';
 14 | 
 15 | CREATE TABLE Majors (
 16 |     MajorID INT NOT NULL PRIMARY KEY COMMENT '专业ID',
 17 |     Major TEXT COMMENT '专业名称'
 18 | ) COMMENT = '专业信息表';
 19 | 
 20 | CREATE TABLE Staff (
 21 |     StaffID INT NOT NULL PRIMARY KEY COMMENT '员工ID',
 22 |     StfFirstName TEXT COMMENT '员工名',
 23 |     StfLastname TEXT COMMENT '员工姓',
 24 |     StfStreetAddress TEXT COMMENT '员工街道地址',
 25 |     StfCity TEXT COMMENT '员工城市',
 26 |     StfState TEXT COMMENT '员工州',
 27 |     StfZipCode TEXT COMMENT '员工邮政编码',
 28 |     StfAreaCode TEXT COMMENT '员工区号',
 29 |     StfPhoneNumber TEXT COMMENT '员工电话号码',
 30 |     Salary FLOAT COMMENT '薪资',
 31 |     DateHired DATE COMMENT '雇佣日期',
 32 |     Position TEXT COMMENT '职位'
 33 | ) COMMENT = '员工信息表';
 34 | 
 35 | CREATE TABLE Student_Class_Status (
 36 |     ClassStatus INT NOT NULL PRIMARY KEY COMMENT '班级状态ID',
 37 |     ClassStatusDescription TEXT COMMENT '班级状态描述'
 38 | ) COMMENT = '班级状态信息表';
 39 | 
 40 | CREATE TABLE Class_Rooms (
 41 |     ClassRoomID INT NOT NULL PRIMARY KEY COMMENT '教室ID',
 42 |     BuildingCode VARCHAR(50) COMMENT '建筑物代码',
 43 |     PhoneAvailable TINYINT(1) NOT NULL DEFAULT 0 COMMENT '电话可用性'
 44 | ) COMMENT = '教室信息表';
 45 | 
 46 | CREATE TABLE Departments (
 47 |     DepartmentID INT NOT NULL PRIMARY KEY COMMENT '部门ID',
 48 |     DeptName TEXT COMMENT '部门名称',
 49 |     DeptChair INT DEFAULT 0 COMMENT '部门主任ID'
 50 | ) COMMENT = '部门信息表';
 51 | 
 52 | CREATE TABLE Faculty (
 53 |     StaffID INT NOT NULL PRIMARY KEY COMMENT '员工ID',
 54 |     Title TEXT COMMENT '职称',
 55 |     Status TEXT COMMENT '状态',
 56 |     Tenured TINYINT(1) NOT NULL DEFAULT 0 COMMENT '终身职教职'
 57 | ) COMMENT = '教职员工信息表';
 58 | 
 59 | CREATE TABLE Students (
 60 |     StudentID INT NOT NULL PRIMARY KEY COMMENT '学生ID',
 61 |     StudFirstName TEXT COMMENT '学生名',
 62 |     StudLastName TEXT COMMENT '学生姓',
 63 |     StudStreetAddress TEXT COMMENT '学生街道地址',
 64 |     StudCity TEXT COMMENT '学生城市',
 65 |     StudState TEXT COMMENT '学生州',
 66 |     StudZipCode TEXT COMMENT '学生邮政编码',
 67 |     StudAreaCode TEXT COMMENT '学生区号',
 68 |     StudPhoneNumber TEXT COMMENT '学生电话号码',
 69 |     StudGPA FLOAT DEFAULT 0 COMMENT '学生GPA',
 70 |     StudMajor INT COMMENT '学生专业ID'
 71 | ) COMMENT = '学生信息表';
 72 | 
 73 | CREATE TABLE Subjects (
 74 |     SubjectID INT NOT NULL PRIMARY KEY COMMENT '科目ID',
 75 |     CategoryID VARCHAR(50) COMMENT '类别ID',
 76 |     SubjectCode TEXT COMMENT '科目代码',
 77 |     SubjectName TEXT COMMENT '科目名称',
 78 |     SubjectPreReq TEXT DEFAULT NULL COMMENT '科目先修课程',
 79 |     SubjectDescription TEXT COMMENT '科目描述'
 80 | ) COMMENT = '科目信息表';
 81 | 
 82 | CREATE TABLE Classes (
 83 |     ClassID INT NOT NULL PRIMARY KEY COMMENT '课程ID',
 84 |     SubjectID INT DEFAULT 0 COMMENT '科目ID',
 85 |     ClassRoomID INT DEFAULT 0 COMMENT '教室ID',
 86 |     Credits INT DEFAULT 0 COMMENT '学分',
 87 |     StartDate DATE COMMENT '开始日期',
 88 |     StartTime TIME COMMENT '开始时间',
 89 |     Duration INT DEFAULT 0 COMMENT '���续时间',
 90 |     MondaySchedule TINYINT(1) NOT NULL DEFAULT 0 COMMENT '周一课程安排',
 91 |     TuesdaySchedule TINYINT(1) NOT NULL DEFAULT 0 COMMENT '���二课程安排',
 92 |     WednesdaySchedule TINYINT(1) NOT NULL DEFAULT 0 COMMENT '周三课程安排',
 93 |     ThursdaySchedule TINYINT(1) NOT NULL DEFAULT 0 COMMENT '周四课程安排',
 94 |     FridaySchedule TINYINT(1) NOT NULL DEFAULT 0 COMMENT '周五课程安排',
 95 |     SaturdaySchedule TINYINT(1) NOT NULL DEFAULT 0 COMMENT '周六课程安排'
 96 | ) COMMENT = '课程信息表';
 97 | 
 98 | CREATE TABLE Faculty_Categories (
 99 |     StaffID INT NOT NULL COMMENT '员工ID',
100 |     CategoryID VARCHAR(50) NOT NULL COMMENT '类别ID',
101 |     PRIMARY KEY (StaffID, CategoryID)
102 | ) COMMENT = '教职员工类别信息表';
103 | 
104 | CREATE TABLE Faculty_Subjects (
105 |     StaffID INT NOT NULL COMMENT '员工ID',
106 |     SubjectID INT NOT NULL COMMENT '科目ID',
107 |     ProficiencyRating FLOAT DEFAULT 0 COMMENT '专业能力评分',
108 |     PRIMARY KEY (StaffID, SubjectID)
109 | ) COMMENT = '教职员工科目信息表';
110 | 
111 | CREATE TABLE Faculty_Classes (
112 |     ClassID INT NOT NULL COMMENT '课程ID',
113 |     StaffID INT NOT NULL COMMENT '员工ID',
114 |     PRIMARY KEY (ClassID, StaffID)
115 | ) COMMENT = '教职员工授课信息表';
116 | 
117 | CREATE TABLE Student_Schedules (
118 |     StudentID INT NOT NULL COMMENT '学生ID',
119 |     ClassID INT NOT NULL COMMENT '课程ID',
120 |     ClassStatus INT DEFAULT 0 COMMENT '班级状态',
121 |     Grade FLOAT DEFAULT 0 COMMENT '成绩',
122 |     PRIMARY KEY (StudentID, ClassID)
123 | ) COMMENT = '学生课程安排信息表';


--------------------------------------------------------------------------------
/database/mysql/2_data_sql.sql:
--------------------------------------------------------------------------------
  1 | INSERT INTO Buildings (BuildingCode, BuildingName, NumberOfFloors, ElevatorAccess, SiteParkingAvailable) VALUES ('AS', 'Arts and Sciences', 3, 1, 1);
  2 | INSERT INTO Buildings (BuildingCode, BuildingName, NumberOfFloors, ElevatorAccess, SiteParkingAvailable) VALUES ('CC', 'College Center', 3, 1, 0);
  3 | INSERT INTO Buildings (BuildingCode, BuildingName, NumberOfFloors, ElevatorAccess, SiteParkingAvailable) VALUES ('GYM', 'PE and Wellness', 1, 0, 1);
  4 | INSERT INTO Buildings (BuildingCode, BuildingName, NumberOfFloors, ElevatorAccess, SiteParkingAvailable) VALUES ('IB', 'Instructional Building', 3, 1, 1);
  5 | INSERT INTO Buildings (BuildingCode, BuildingName, NumberOfFloors, ElevatorAccess, SiteParkingAvailable) VALUES ('LB', 'Library', 2, 1, 1);
  6 | INSERT INTO Buildings (BuildingCode, BuildingName, NumberOfFloors, ElevatorAccess, SiteParkingAvailable) VALUES ('TB', 'Technology Building', 2, 1, 1);
  7 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('ACC', 'Accounting', 1);
  8 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('ART', 'Art', 3);
  9 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('BIO', 'Biology', 2);
 10 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('BUS', 'Business', 1);
 11 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('CHE', 'Chemistry', 2);
 12 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('CIS', 'Computer Information Systems', 5);
 13 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('CSC', 'Computer Science', 5);
 14 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('ECO', 'Economics', 4);
 15 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('ENG', 'English', 3);
 16 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('FRE', 'French', 3);
 17 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('GEG', 'Geography', 4);
 18 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('GER', 'German', 3);
 19 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('HIS', 'History', 4);
 20 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('JRN', 'Journalism', 1);
 21 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('MAT', 'Math', 2);
 22 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('MUS', 'Music', 3);
 23 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('PHY', 'Physics', 2);
 24 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('POL', 'Political Science', 4);
 25 | INSERT INTO Categories (CategoryID, CategoryDescription, DepartmentID) VALUES ('PSY', 'Psychology', 4);
 26 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1131, 'LB', 1);
 27 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1142, 'LB', 0);
 28 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1231, 'LB', 1);
 29 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1514, 'AS', 1);
 30 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1515, 'AS', 1);
 31 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1519, 'AS', 0);
 32 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1525, 'AS', 1);
 33 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1530, 'AS', 1);
 34 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1532, 'AS', 0);
 35 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1619, 'AS', 0);
 36 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1622, 'AS', 1);
 37 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1624, 'AS', 0);
 38 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1627, 'AS', 0);
 39 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1639, 'TB', 1);
 40 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1640, 'TB', 0);
 41 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1641, 'TB', 0);
 42 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (1642, 'TB', 0);
 43 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (2357, 'CC', 1);
 44 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (2408, 'IB', 0);
 45 | INSERT INTO Class_Rooms (ClassRoomID, BuildingCode, PhoneAvailable) VALUES (2423, 'IB', 0);
 46 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1000, 11, 1231, 5, '2017-09-12', '10:00:00', 50, 0, 1, 1, 1, 1, 1);
 47 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1002, 12, 1619, 4, '2017-09-11', '15:30:00', 110, 1, 0, 1, 0, 0, 0);
 48 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1004, 13, 1627, 4, '2017-09-11', '08:00:00', 50, 1, 0, 1, 1, 1, 0);
 49 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1006, 13, 1627, 4, '2017-09-11', '09:00:00', 110, 1, 0, 1, 0, 0, 0);
 50 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1012, 14, 1627, 4, '2017-09-12', '13:00:00', 110, 0, 1, 0, 1, 0, 0);
 51 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1020, 15, 3404, 4, '2017-09-12', '13:00:00', 110, 0, 1, 0, 1, 0, 0);
 52 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1030, 16, 1231, 5, '2017-09-11', '11:00:00', 50, 1, 1, 1, 1, 1, 0);
 53 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1031, 16, 1231, 5, '2017-09-11', '14:00:00', 50, 1, 1, 1, 1, 1, 0);
 54 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1156, 37, 3443, 5, '2017-09-11', '16:00:00', 50, 1, 1, 1, 1, 1, 0);
 55 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1162, 37, 3443, 5, '2017-09-11', '09:00:00', 140, 1, 0, 1, 0, 0, 0);
 56 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1168, 37, 3445, 5, '2017-09-11', '11:00:00', 50, 1, 1, 1, 1, 1, 0);
 57 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1180, 38, 3446, 5, '2017-09-11', '11:30:00', 140, 1, 0, 1, 0, 0, 0);
 58 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1183, 38, 3415, 5, '2017-09-11', '13:00:00', 50, 1, 1, 1, 1, 1, 0);
 59 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1184, 38, 3415, 5, '2017-09-11', '14:00:00', 50, 1, 1, 1, 1, 1, 0);
 60 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1196, 39, 3415, 5, '2017-09-11', '15:00:00', 50, 1, 1, 1, 1, 1, 0);
 61 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1500, 33, 1142, 5, '2017-09-11', '08:00:00', 50, 1, 1, 1, 1, 1, 0);
 62 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1502, 34, 1142, 5, '2017-09-11', '09:00:00', 50, 1, 1, 1, 1, 1, 0);
 63 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1560, 35, 2408, 3, '2017-09-12', '10:00:00', 50, 0, 1, 0, 1, 0, 1);
 64 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1562, 36, 2408, 5, '2017-09-11', '12:00:00', 140, 1, 0, 0, 0, 1, 0);
 65 | INSERT INTO Classes (ClassID, SubjectID, ClassRoomID, Credits, StartDate, StartTime, Duration, MondaySchedule, TuesdaySchedule, WednesdaySchedule, ThursdaySchedule, FridaySchedule, SaturdaySchedule) VALUES (1642, 29, 1514, 2, '2017-09-12', '11:00:00', 50, 0, 1, 0, 1, 0, 0);
 66 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (1, 'ACC', 'ACC 210', 'Financial Accounting Fundamentals I', 'Introduces basic accounting concepts, principles and prodcedures for recording business transactions and developing financial accounting reports. Excel spreadsheet component.');
 67 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectPreReq, SubjectDescription) VALUES (2, 'ACC', 'ACC 220', 'Financial Accounting Fundamentals II', 'ACC 210', 'Applications of basic accounting concepts, principles and procedures to more complex business situations and to different forms of enterprise ownership. Includes computerized element. Prereq: ACC 210 or instructor permission.');
 68 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectPreReq, SubjectDescription) VALUES (3, 'ACC', 'ACC 230', 'Fundamentals of Managerial Accounting', 'ACC 220', 'Analysis of accounting data as part of the managerial process of planning, decision making and control. Concentrates on economic decision making in enterprises. Includes computerized element. Prereq: ACC 220 or instructor permission.');
 69 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectPreReq, SubjectDescription) VALUES (4, 'ACC', 'ACC 251', 'Intermediate Accounting', 'ACC 220', 'In-depth review of financial accounting principles. Emphasizes the conceptual framework of accounting, revenue and expense recognition. Accounts Receivable, Depreciation, and Amortization, etc. Prereq: ACC 220 or instructor permission.');
 70 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectPreReq, SubjectDescription) VALUES (5, 'ACC', 'ACC 257', 'Business Tax Accounting', 'ACC 220', 'Basic principles, practices and governmental regulations (Federal, Washington, State, and local) involved in business tax accounting including filing returns, record keeping, tax planning, and registrations and business licenses. Prereq: ACC 220 or instructors permissions.');
 71 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (6, 'BUS', 'BUS 101', 'Introduction to Business', 'Survey of businss practices. Covers business terminology, forms of business ownership, franchising, small and international businesses, leadership and management, marketing principles, financing and investment methods, and business environment.');
 72 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (7, 'BUS', 'BUS 155', 'Developing A Feasibility Plan', 'With the aid of a counselor, a feasibility plan will be developed which will be the basis or start of your business plan. Must be concurrently enrolled in BUS 151.');
 73 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (8, 'BUS', 'BUS 151', 'Introduction to Entrepreneurship', 'Overview of the entrepreneurial process, examination of the marketplace, and discussion of successful business strategies. Product selection, selling and marketing strategies. Sources of information and assistance. Must be concurrently enrolled in BUS 155.');
 74 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (9, 'BUS', 'BUS 170', 'Information Technology I', 'Uses Word for Windows word processing skills, document formatting, keyboarding, and 10-key keypad skills. Emphasis on preparing letters, memos, reports, and tables. Introduces Excel spreadsheet basics.');
 75 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectPreReq, SubjectDescription) VALUES (10, 'BUS', 'BUS 171', 'Information Technology II', 'BUS 170', 'Uses intermediate Word features including formatting and production, mail merge, macros, text columns, graphics, and fonts; Excel spreadsheet; and introduction to PowerPoint presentation software, Internet and email. Prereq: BUS 170 or permission from instructor.');
 76 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (11, 'ART', 'ART 100', 'Introduction to Art', 'Historical backgrounds and design fundamentals which have affected art. Includes slide lectures, reading and practical studio applications.');
 77 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (12, 'ART', 'ART 101', 'Design', 'Studio sudies in the fundamentals of two-dimensional art with problems based on line, space, texture, shape and color theories. Includes practical applications of these theories to design.');
 78 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (13, 'ART', 'ART 111', 'Drawing', 'Study of line, value, space, perspective, and compostion through the use o charcoal, pencil, pen, and brush.');
 79 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (14, 'ART', 'ART 201', 'Painting', 'Beginning painting in oil or synthetic media using still life. Emphasis on basics such as composition, value studies, color mixing, canvas preparation, and various styles and techniques. No prerequisite; some drawing background important.');
 80 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (15, 'ART', 'ART 210', 'Computer Art', 'Explore the elements of art such as line, value, space, composition, and color through the use of the computer. Sudents will create works of art using the computer.');
 81 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (16, 'ART', 'ART 251', 'Art History', 'Surveys major forms of visual expression from the Paleolithic, Egyptian, Mesopotamian, Greek, Roman, and Early Christian periods. Includes painting, sculpture, architecture, lectures, slides, and readings.');
 82 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (17, 'BIO', 'BIO 100', 'Biological Principles', 'An introductory biology course with lab for the non-science major. May include maintenance of the balance between man and his environment, nutrition, genetics and inheritence, ecological principles, plant and animal diversity, and evolution.');
 83 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (18, 'BIO', 'BIO 101', 'General Biology', 'Basic bilogical concepts with emphasis on general cell processes, plant and animal diversity, morphyology, limited reproduction, phylogeny of the living organisms, exploration of molecular genetics.');
 84 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (19, 'BIO', 'BIO 280', 'Microbiology', 'Introduction to micro-organisms including microbial cell structure and function; metabolism; microbial genetics; and the role of micro-organisms in disease, immunity, and other selected applied areas.');
 85 | INSERT INTO Subjects (SubjectID, CategoryID, SubjectCode, SubjectName, SubjectDescription) VALUES (20, 'CHE', 'CHE 101', 'Chemistry', 'General chemistry for non-science majors. Completion of CHE 101 fulfills chemistry requirements for many health science majors.');
 86 | INSERT INTO Departments (DepartmentID, DeptName, DeptChair) VALUES (1, 'Business Administration', 98012);
 87 | INSERT INTO Departments (DepartmentID, DeptName, DeptChair) VALUES (2, 'Sciences', 98010);
 88 | INSERT INTO Departments (DepartmentID, DeptName, DeptChair) VALUES (3, 'Humanities', 98005);
 89 | INSERT INTO Departments (DepartmentID, DeptName, DeptChair) VALUES (4, 'Social Sciences', 98059);
 90 | INSERT INTO Departments (DepartmentID, DeptName, DeptChair) VALUES (5, 'Information Technology', 98007);
 91 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98005, 'Suzanne', 'Viescas', '15127 NE 24th, #383', 'Redmond', 'WA', '98052', '425', '555-2686', 44000.0, '1986-05-31', 'Faculty');
 92 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98007, 'Gary', 'Hallmark', 'Route 2, Box 203B', 'Auburn', 'WA', '98002', '253', '555-2676', 53000.0, '1985-01-21', 'Faculty');
 93 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98010, 'Jeffrey', 'Smith', '30301 - 166th Ave. N.E.', 'Fremont', 'CA', '94538', '510', '555-2596', 52000.0, '1983-10-06', 'Faculty');
 94 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98011, 'Ann', 'Patterson', '16 Maple Lane', 'Marysville', 'WA', '98271', '253', '555-2591', 45000.0, '1983-10-16', 'Faculty');
 95 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98012, 'Robert', 'Brown', '672 Lamont Ave', 'Houston', 'TX', '77201', '713', '555-2491', 49000.0, '1989-02-09', 'Faculty');
 96 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98013, 'Deb', 'Waldal', '908 W. Capital Way', 'Tacoma', 'WA', '98413', '253', '555-2496', 44000.0, '1986-07-05', 'Faculty');
 97 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98014, 'Peter', 'Brehm', '722 Moss Bay Blvd.', 'Kirkland', 'WA', '98033', '425', '555-2501', 60000.0, '1986-07-16', 'Faculty');
 98 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98019, 'Mariya', 'Sergienko', '901 Pine Avenue', 'Portland', 'OR', '97208', '503', '555-2526', 45000.0, '1989-11-02', 'Faculty');
 99 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98020, 'Jim', 'Glynn', '13920 S.E. 40th Street', 'Bellevue', 'WA', '98009', '425', '555-2531', 45000.0, '1985-08-02', 'Faculty');
100 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98021, 'Tim', 'Smith', '30301 - 166th Ave. N.E.', 'Seattle', 'WA', '98106', '206', '555-2536', 40000.0, '1988-12-17', 'Registrar');
101 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98025, 'Carol', 'Viescas', '722 Moss Bay Blvd.', 'Kirkland', 'WA', '98033', '425', '555-2576', 50000.0, '1984-04-12', 'Faculty');
102 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98028, 'Alaina', 'Hallmark', 'Route 2, Box 203B', 'Marysville', 'WA', '98270', '425', '555-2631', 57000.0, '1984-01-17', 'Faculty');
103 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98030, 'Liz', 'Keyser', '13920 S.E. 40th Street', 'Bellevue', 'WA', '98006', '425', '555-2556', 48000.0, '1988-05-31', 'Faculty');
104 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98036, 'Sam', 'Abolrous', '611 Alpine Drive', 'Palm Springs', 'CA', '92263', '760', '555-2611', 60000.0, '1982-11-20', 'Faculty');
105 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98040, 'Jim', 'Wilson', '101 NE 88th', 'Salem', 'OR', '97301', '503', '555-2636', 50000.0, '1987-01-13', 'Faculty');
106 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98042, 'David', 'Smith', '311 20th Ave. N.E.', 'Fremont', 'CA', '94538', '510', '555-2646', 52000.0, '1991-12-17', 'Faculty');
107 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98043, 'Kathryn', 'Patterson', '16 Maple Lane', 'Seattle', 'WA', '98115', '206', '555-2651', 25000.0, '1984-11-14', 'Secretary');
108 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98045, 'Michael', 'Hernandez', 'PO Box 223311', 'Tacoma', 'WA', '98413', '253', '555-2711', 60000.0, '1990-08-20', 'Faculty');
109 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98048, 'Joyce', 'Bonnicksen', '2424 Thames Drive', 'Bellevue', 'WA', '98006', '425', '555-2726', 60000.0, '1986-03-02', 'Faculty');
110 | INSERT INTO Staff (StaffID, StfFirstName, StfLastname, StfStreetAddress, StfCity, StfState, StfZipCode, StfAreaCode, StfPhoneNumber, Salary, DateHired, Position) VALUES (98052, 'Katherine', 'Ehrlich', '777 Fenexet Blvd', 'Redmond', 'WA', '98052', '425', '555-0399', 45000.0, '1985-03-08', 'Faculty');
111 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98005, 'Professor', 'Full Time', 1);
112 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98007, 'Professor', 'Full Time', 1);
113 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98010, 'Professor', 'On Leave', 1);
114 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98011, 'Instructor', 'Full Time', 1);
115 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98012, 'Professor', 'Full Time', 1);
116 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98013, 'Instructor', 'Full Time', 1);
117 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98014, 'Associate Professor', 'Full Time', 1);
118 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98019, 'Instructor', 'Full Time', 1);
119 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98020, 'Instructor', 'Full Time', 1);
120 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98025, 'Associate Professor', 'Full Time', 1);
121 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98028, 'Professor', 'Full Time', 1);
122 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98030, 'Instructor', 'Full Time', 1);
123 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98036, 'Professor', 'Full Time', 1);
124 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98040, 'Associate Professor', 'Full Time', 1);
125 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98042, 'Associate Professor', 'Full Time', 1);
126 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98045, 'Professor', 'Full Time', 1);
127 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98048, 'Professor', 'Full Time', 1);
128 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98052, 'Instructor', 'Part Time', 0);
129 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98053, 'Instructor', 'Full Time', 1);
130 | INSERT INTO Faculty (StaffID, Title, Status, Tenured) VALUES (98055, 'Professor', 'Full Time', 1);
131 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98005, 'ART');
132 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98005, 'ENG');
133 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98005, 'MUS');
134 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98007, 'ACC');
135 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98007, 'ART');
136 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98007, 'CIS');
137 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98007, 'MAT');
138 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98010, 'ACC');
139 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98010, 'CIS');
140 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98010, 'MAT');
141 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98011, 'ENG');
142 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98011, 'HIS');
143 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98011, 'MAT');
144 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98012, 'ACC');
145 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98012, 'ECO');
146 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98012, 'MAT');
147 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98013, 'CIS');
148 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98013, 'CSC');
149 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98013, 'MAT');
150 | INSERT INTO Faculty_Categories (StaffID, CategoryID) VALUES (98014, 'ART');
151 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1000, 98014);
152 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1002, 98036);
153 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1004, 98019);
154 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1006, 98045);
155 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1012, 98030);
156 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1020, 98028);
157 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1030, 98036);
158 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1031, 98005);
159 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1156, 98055);
160 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1162, 98064);
161 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1168, 98055);
162 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1180, 98011);
163 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1183, 98005);
164 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1184, 98011);
165 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1196, 98028);
166 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1500, 98028);
167 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1502, 98036);
168 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1560, 98028);
169 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (1562, 98036);
170 | INSERT INTO Faculty_Classes (ClassID, StaffID) VALUES (2001, 98020);
171 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98005, 12, 10.0);
172 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98005, 16, 10.0);
173 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98005, 34, 9.0);
174 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98005, 36, 8.0);
175 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98005, 38, 8.0);
176 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98007, 2, 9.0);
177 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98007, 5, 10.0);
178 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98007, 14, 8.0);
179 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98007, 24, 10.0);
180 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98007, 46, 10.0);
181 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98010, 2, 8.0);
182 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98010, 5, 10.0);
183 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98010, 24, 8.0);
184 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98010, 48, 10.0);
185 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98011, 38, 9.0);
186 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98011, 43, 8.0);
187 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98011, 49, 10.0);
188 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98012, 1, 9.0);
189 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98012, 4, 10.0);
190 | INSERT INTO Faculty_Subjects (StaffID, SubjectID, ProficiencyRating) VALUES (98012, 32, 9.0);
191 | INSERT INTO Majors (MajorID, Major) VALUES (1, 'General Studies');
192 | INSERT INTO Majors (MajorID, Major) VALUES (2, 'English');
193 | INSERT INTO Majors (MajorID, Major) VALUES (3, 'Music');
194 | INSERT INTO Majors (MajorID, Major) VALUES (4, 'Information Sciences');
195 | INSERT INTO Majors (MajorID, Major) VALUES (5, 'Accounting');
196 | INSERT INTO Majors (MajorID, Major) VALUES (6, 'Art');
197 | INSERT INTO Majors (MajorID, Major) VALUES (7, 'Mathematics');
198 | INSERT INTO Student_Class_Status (ClassStatus, ClassStatusDescription) VALUES (1, 'Enrolled');
199 | INSERT INTO Student_Class_Status (ClassStatus, ClassStatusDescription) VALUES (2, 'Completed');
200 | INSERT INTO Student_Class_Status (ClassStatus, ClassStatusDescription) VALUES (3, 'Withdrew');
201 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 1000, 2, 99.83);
202 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 1168, 2, 70.0);
203 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 2907, 2, 67.33);
204 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 3085, 2, 87.14);
205 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 4180, 1, 0.0);
206 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 5917, 1, 0.0);
207 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1001, 6082, 1, 0.0);
208 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1002, 1156, 2, 86.33);
209 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1002, 1500, 2, 85.72);
210 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1002, 2223, 3, 0.0);
211 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1002, 2889, 2, 68.22);
212 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 1156, 2, 71.09);
213 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 1500, 2, 89.05);
214 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 1502, 2, 75.71);
215 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 2911, 2, 85.39);
216 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 4180, 1, 0.0);
217 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 4560, 1, 0.0);
218 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1003, 6082, 1, 0.0);
219 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1004, 1162, 2, 91.44);
220 | INSERT INTO Student_Schedules (StudentID, ClassID, ClassStatus, Grade) VALUES (1004, 2410, 2, 90.56);
221 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1001, 'Kerry', 'Patterson', '9877 Hacienda Drive', 'San Antonio', 'TX', '78284', '210', '555-2706', 74.465, 1);
222 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1002, 'David', 'Hamilton', '908 W. Capital Way', 'Tacoma', 'WA', '98413', '253', '555-2701', 78.755, 2);
223 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1003, 'Betsy', 'Stadick', '611 Alpine Drive', 'Palm Springs', 'CA', '92263', '760', '555-2696', 85.235, 3);
224 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1004, 'Janice', 'Galvin', '4110 Old Redmond Rd.', 'Redmond', 'WA', '98052', '425', '555-2691', 81.0, 4);
225 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1005, 'Doris', 'Hartwig', '4726 - 11th Ave. N.E.', 'Seattle', 'WA', '98105', '206', '555-2671', 72.225, 5);
226 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1006, 'Scott', 'Bishop', '66 Spring Valley Drive', 'Medford', 'OR', '97501', '541', '555-2666', 88.5, 6);
227 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1007, 'Elizabeth', 'Hallmark', 'Route 2, Box 203B', 'Marysville', 'WA', '98271', '253', '555-2521', 87.65, 7);
228 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1008, 'Sara', 'Sheskey', '16679 NE 41st Court', 'Portland', 'OR', '97208', '503', '555-2566', 84.625, 1);
229 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1009, 'Karen', 'Smith', '30301 - 166th Ave. N.E.', 'Eugene', 'OR', '97401', '541', '555-2551', 80.0, 2);
230 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1010, 'Marianne', 'Wier', '908 W. Capital Way', 'Tacoma', 'WA', '98413', '253', '555-2606', 83.55, 3);
231 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1011, 'John', 'Kennedy', '16679 NE 41st Court', 'Portland', 'OR', '97208', '503', '555-2621', 77.65, 4);
232 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1012, 'Sarah', 'Thompson', '2222 Springer Road', 'Lubbock', 'TX', '79402', '806', '555-2626', 89.5, 5);
233 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1013, 'Michael', 'Viescas', '15127 NE 24th, #383', 'Redmond', 'WA', '98052', '425', '555-2656', 80.25, 6);
234 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1014, 'Kendra', 'Bonnicksen', '12330 Larchlemont Lane', 'Seattle', 'WA', '98105', '206', '555-2716', 85.55, 7);
235 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1015, 'Brannon', 'Jones', '777 Fenexet Blvd', 'Long Beach', 'CA', '90809', '562', '555-0399', 86.0, 2);
236 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1016, 'Steve', 'Pundt', '2500 Rosales Lane', 'Dallas', 'TX', '75204', '972', '555-9938', 77.125, 4);
237 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1017, 'George', 'Chavez', '281 Old Navy Road', 'Marysville', 'WA', '98270', '206', '555-9930', 79.25, 6);
238 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA, StudMajor) VALUES (1018, 'Richard', 'Lum', '754 Fourth Ave', 'Seattle', 'WA', '98115', '206', '555-2296', 72.55, 7);
239 | INSERT INTO Students (StudentID, StudFirstName, StudLastName, StudStreetAddress, StudCity, StudState, StudZipCode, StudAreaCode, StudPhoneNumber, StudGPA) VALUES (1019, 'Daffy', 'Dumbwit', '4567 NE 32nd Ct', 'Bellevue', 'WA', '98002', '425', '555-9872', 0.0);


--------------------------------------------------------------------------------
/database/postgresql/1_ddl_sql.sql:
--------------------------------------------------------------------------------
  1 | CREATE TABLE Buildings (
  2 |     BuildingCode TEXT NOT NULL PRIMARY KEY,
  3 |     BuildingName TEXT,
  4 |     NumberOfFloors INTEGER,
  5 |     ElevatorAccess BOOLEAN NOT NULL DEFAULT FALSE,
  6 |     SiteParkingAvailable BOOLEAN NOT NULL DEFAULT FALSE
  7 | );
  8 | COMMENT ON TABLE Buildings IS '建筑物信息表';
  9 | COMMENT ON COLUMN Buildings.BuildingCode IS '建筑物代码';
 10 | COMMENT ON COLUMN Buildings.BuildingName IS '建筑物名称';
 11 | COMMENT ON COLUMN Buildings.NumberOfFloors IS '楼层数量';
 12 | COMMENT ON COLUMN Buildings.ElevatorAccess IS '电梯可用性';
 13 | COMMENT ON COLUMN Buildings.SiteParkingAvailable IS '场地停车可用性';
 14 | 
 15 | CREATE TABLE Categories (
 16 |     CategoryID TEXT NOT NULL PRIMARY KEY,
 17 |     CategoryDescription TEXT,
 18 |     DepartmentID INTEGER DEFAULT 0
 19 | );
 20 | COMMENT ON TABLE Categories IS '类别信息表';
 21 | COMMENT ON COLUMN Categories.CategoryID IS '类别ID';
 22 | COMMENT ON COLUMN Categories.CategoryDescription IS '类别描述';
 23 | COMMENT ON COLUMN Categories.DepartmentID IS '部门ID';
 24 | 
 25 | CREATE TABLE Majors (
 26 |     MajorID INTEGER NOT NULL PRIMARY KEY,
 27 |     Major TEXT
 28 | );
 29 | COMMENT ON TABLE Majors IS '专业信息表';
 30 | COMMENT ON COLUMN Majors.MajorID IS '专业ID';
 31 | COMMENT ON COLUMN Majors.Major IS '专业名称';
 32 | 
 33 | CREATE TABLE Staff (
 34 |     StaffID INTEGER NOT NULL PRIMARY KEY,
 35 |     StfFirstName TEXT,
 36 |     StfLastname TEXT,
 37 |     StfStreetAddress TEXT,
 38 |     StfCity TEXT,
 39 |     StfState TEXT,
 40 |     StfZipCode TEXT,
 41 |     StfAreaCode TEXT,
 42 |     StfPhoneNumber TEXT,
 43 |     Salary REAL,
 44 |     DateHired DATE,
 45 |     Position TEXT
 46 | );
 47 | COMMENT ON TABLE Staff IS '员工信息表';
 48 | COMMENT ON COLUMN Staff.StaffID IS '员工ID';
 49 | COMMENT ON COLUMN Staff.StfFirstName IS '员工名';
 50 | COMMENT ON COLUMN Staff.StfLastname IS '员工姓';
 51 | COMMENT ON COLUMN Staff.StfStreetAddress IS '员工街道地址';
 52 | COMMENT ON COLUMN Staff.StfCity IS '员工城市';
 53 | COMMENT ON COLUMN Staff.StfState IS '员工州';
 54 | COMMENT ON COLUMN Staff.StfZipCode IS '员工邮政编码';
 55 | COMMENT ON COLUMN Staff.StfAreaCode IS '员工区号';
 56 | COMMENT ON COLUMN Staff.StfPhoneNumber IS '员工电话号码';
 57 | COMMENT ON COLUMN Staff.Salary IS '薪资';
 58 | COMMENT ON COLUMN Staff.DateHired IS '雇佣日期';
 59 | COMMENT ON COLUMN Staff.Position IS '职位';
 60 | 
 61 | CREATE TABLE Student_Class_Status (
 62 |     ClassStatus INTEGER NOT NULL PRIMARY KEY,
 63 |     ClassStatusDescription TEXT
 64 | );
 65 | COMMENT ON TABLE Student_Class_Status IS '班级状态信息表';
 66 | COMMENT ON COLUMN Student_Class_Status.ClassStatus IS '班级状态ID';
 67 | COMMENT ON COLUMN Student_Class_Status.ClassStatusDescription IS '班级状态描述';
 68 | 
 69 | CREATE TABLE Class_Rooms (
 70 |     ClassRoomID INTEGER NOT NULL PRIMARY KEY,
 71 |     BuildingCode TEXT,
 72 |     PhoneAvailable BOOLEAN NOT NULL DEFAULT FALSE
 73 | );
 74 | COMMENT ON TABLE Class_Rooms IS '教室信息表';
 75 | COMMENT ON COLUMN Class_Rooms.ClassRoomID IS '教室ID';
 76 | COMMENT ON COLUMN Class_Rooms.BuildingCode IS '建筑物代码';
 77 | COMMENT ON COLUMN Class_Rooms.PhoneAvailable IS '电话可用性';
 78 | 
 79 | CREATE TABLE Departments (
 80 |     DepartmentID INTEGER NOT NULL PRIMARY KEY,
 81 |     DeptName TEXT,
 82 |     DeptChair INTEGER DEFAULT 0
 83 | );
 84 | COMMENT ON TABLE Departments IS '部门信息表';
 85 | COMMENT ON COLUMN Departments.DepartmentID IS '部门ID';
 86 | COMMENT ON COLUMN Departments.DeptName IS '部门名称';
 87 | COMMENT ON COLUMN Departments.DeptChair IS '部门主任ID';
 88 | 
 89 | CREATE TABLE Faculty (
 90 |     StaffID INTEGER NOT NULL PRIMARY KEY,
 91 |     Title TEXT,
 92 |     Status TEXT,
 93 |     Tenured BOOLEAN NOT NULL DEFAULT FALSE
 94 | );
 95 | COMMENT ON TABLE Faculty IS '教职员工信息表';
 96 | COMMENT ON COLUMN Faculty.StaffID IS '员工ID';
 97 | COMMENT ON COLUMN Faculty.Title IS '职称';
 98 | COMMENT ON COLUMN Faculty.Status IS '状态';
 99 | COMMENT ON COLUMN Faculty.Tenured IS '终身职教职';
100 | 
101 | CREATE TABLE Students (
102 |     StudentID INTEGER NOT NULL PRIMARY KEY,
103 |     StudFirstName TEXT,
104 |     StudLastName TEXT,
105 |     StudStreetAddress TEXT,
106 |     StudCity TEXT,
107 |     StudState TEXT,
108 |     StudZipCode TEXT,
109 |     StudAreaCode TEXT,
110 |     StudPhoneNumber TEXT,
111 |     StudGPA REAL DEFAULT 0,
112 |     StudMajor INTEGER
113 | );
114 | COMMENT ON TABLE Students IS '学生信息表';
115 | COMMENT ON COLUMN Students.StudentID IS '学生ID';
116 | COMMENT ON COLUMN Students.StudFirstName IS '学生名';
117 | COMMENT ON COLUMN Students.StudLastName IS '学生姓';
118 | COMMENT ON COLUMN Students.StudStreetAddress IS '学生街道地址';
119 | COMMENT ON COLUMN Students.StudCity IS '学生城市';
120 | COMMENT ON COLUMN Students.StudState IS '学生州';
121 | COMMENT ON COLUMN Students.StudZipCode IS '学生邮政编码';
122 | COMMENT ON COLUMN Students.StudAreaCode IS '学生区号';
123 | COMMENT ON COLUMN Students.StudPhoneNumber IS '学生电话号码';
124 | COMMENT ON COLUMN Students.StudGPA IS '学生GPA';
125 | COMMENT ON COLUMN Students.StudMajor IS '学生专业ID';
126 | 
127 | CREATE TABLE Subjects (
128 |     SubjectID INTEGER NOT NULL PRIMARY KEY,
129 |     CategoryID TEXT,
130 |     SubjectCode TEXT,
131 |     SubjectName TEXT,
132 |     SubjectPreReq TEXT DEFAULT NULL,
133 |     SubjectDescription TEXT
134 | );
135 | COMMENT ON TABLE Subjects IS '科目信息表';
136 | COMMENT ON COLUMN Subjects.SubjectID IS '科目ID';
137 | COMMENT ON COLUMN Subjects.CategoryID IS '类别ID';
138 | COMMENT ON COLUMN Subjects.SubjectCode IS '科目代码';
139 | COMMENT ON COLUMN Subjects.SubjectName IS '科目名称';
140 | COMMENT ON COLUMN Subjects.SubjectPreReq IS '科目先修课程';
141 | COMMENT ON COLUMN Subjects.SubjectDescription IS '科目描述';
142 | 
143 | CREATE TABLE Classes (
144 |     ClassID INTEGER NOT NULL PRIMARY KEY,
145 |     SubjectID INTEGER DEFAULT 0,
146 |     ClassRoomID INTEGER DEFAULT 0,
147 |     Credits INTEGER DEFAULT 0,
148 |     StartDate DATE,
149 |     StartTime TIME,
150 |     Duration INTEGER DEFAULT 0,
151 |     MondaySchedule BOOLEAN NOT NULL DEFAULT FALSE,
152 |     TuesdaySchedule BOOLEAN NOT NULL DEFAULT FALSE,
153 |     WednesdaySchedule BOOLEAN NOT NULL DEFAULT FALSE,
154 |     ThursdaySchedule BOOLEAN NOT NULL DEFAULT FALSE,
155 |     FridaySchedule BOOLEAN NOT NULL DEFAULT FALSE,
156 |     SaturdaySchedule BOOLEAN NOT NULL DEFAULT FALSE
157 | );
158 | COMMENT ON TABLE Classes IS '课程信息表';
159 | COMMENT ON COLUMN Classes.ClassID IS '课程ID';
160 | COMMENT ON COLUMN Classes.SubjectID IS '科目ID';
161 | COMMENT ON COLUMN Classes.ClassRoomID IS '教室ID';
162 | COMMENT ON COLUMN Classes.Credits IS '学分';
163 | COMMENT ON COLUMN Classes.StartDate IS '开始日期';
164 | COMMENT ON COLUMN Classes.StartTime IS '开始时间';
165 | COMMENT ON COLUMN Classes.Duration IS '持续时间';
166 | COMMENT ON COLUMN Classes.MondaySchedule IS '周一课程安排';
167 | COMMENT ON COLUMN Classes.TuesdaySchedule IS '周二课程安排';
168 | COMMENT ON COLUMN Classes.WednesdaySchedule IS '周三课程安排';
169 | COMMENT ON COLUMN Classes.ThursdaySchedule IS '周四课程安排';
170 | COMMENT ON COLUMN Classes.FridaySchedule IS '周五课程安排';
171 | COMMENT ON COLUMN Classes.SaturdaySchedule IS '周六课程安排';
172 | 
173 | CREATE TABLE Faculty_Categories (
174 |     StaffID INTEGER NOT NULL,
175 |     CategoryID TEXT NOT NULL,
176 |     PRIMARY KEY (StaffID, CategoryID)
177 | );
178 | COMMENT ON TABLE Faculty_Categories IS '教职员工类别信息表';
179 | COMMENT ON COLUMN Faculty_Categories.StaffID IS '员工ID';
180 | COMMENT ON COLUMN Faculty_Categories.CategoryID IS '类别ID';
181 | 
182 | CREATE TABLE Faculty_Subjects (
183 |     StaffID INTEGER NOT NULL,
184 |     SubjectID INTEGER NOT NULL,
185 |     ProficiencyRating REAL DEFAULT 0,
186 |     PRIMARY KEY (StaffID, SubjectID)
187 | );
188 | COMMENT ON TABLE Faculty_Subjects IS '教职员工科目信息表';
189 | COMMENT ON COLUMN Faculty_Subjects.StaffID IS '员工ID';
190 | COMMENT ON COLUMN Faculty_Subjects.SubjectID IS '科目ID';
191 | COMMENT ON COLUMN Faculty_Subjects.ProficiencyRating IS '专业能力评分';
192 | 
193 | CREATE TABLE Faculty_Classes (
194 |     ClassID INTEGER NOT NULL,
195 |     StaffID INTEGER NOT NULL,
196 |     PRIMARY KEY (ClassID, StaffID)
197 | );
198 | COMMENT ON TABLE Faculty_Classes IS '教职员工授课信息表';
199 | COMMENT ON COLUMN Faculty_Classes.ClassID IS '课程ID';
200 | COMMENT ON COLUMN Faculty_Classes.StaffID IS '员工ID';
201 | 
202 | CREATE TABLE Student_Schedules (
203 |     StudentID INTEGER NOT NULL,
204 |     ClassID INTEGER NOT NULL,
205 |     ClassStatus INTEGER DEFAULT 0,
206 |     Grade REAL DEFAULT 0,
207 |     PRIMARY KEY (StudentID, ClassID)
208 | );
209 | COMMENT ON TABLE Student_Schedules IS '学生课程安排信息表';
210 | COMMENT ON COLUMN Student_Schedules.StudentID IS '学生ID';
211 | COMMENT ON COLUMN Student_Schedules.ClassID IS '课程ID';
212 | COMMENT ON COLUMN Student_Schedules.ClassStatus IS '班级状态';
213 | COMMENT ON COLUMN Student_Schedules.Grade IS '成绩';


--------------------------------------------------------------------------------
/database/sqlite/music.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/database/sqlite/music.sqlite


--------------------------------------------------------------------------------
/database/sqlite/school_scheduling.sqlite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/database/sqlite/school_scheduling.sqlite


--------------------------------------------------------------------------------
/docs/demo_video.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/demo_video.png


--------------------------------------------------------------------------------
/docs/screenshot-music-database.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-music-database.png


--------------------------------------------------------------------------------
/docs/screenshot-question-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-1.png


--------------------------------------------------------------------------------
/docs/screenshot-question-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-2.png


--------------------------------------------------------------------------------
/docs/screenshot-question-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-3.png


--------------------------------------------------------------------------------
/docs/screenshot-question-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-4.png


--------------------------------------------------------------------------------
/docs/screenshot-question-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-5.png


--------------------------------------------------------------------------------
/docs/screenshot-question-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-6.png


--------------------------------------------------------------------------------
/docs/screenshot-question-7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-7.png


--------------------------------------------------------------------------------
/docs/screenshot-question-chinese.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-chinese.png


--------------------------------------------------------------------------------
/docs/screenshot-question-korean.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-question-korean.png


--------------------------------------------------------------------------------
/docs/screenshot-school-scheduling-database.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/docs/screenshot-school-scheduling-database.png


--------------------------------------------------------------------------------
/example.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | import uuid
 5 | 
 6 | import pandas as pd
 7 | from camel.embeddings import OpenAIEmbedding
 8 | from camel.models import ModelFactory
 9 | from camel.types import ModelPlatformType, ModelType
10 | from colorama import Fore
11 | from tabulate import tabulate
12 | 
13 | from camel_database_agent import DatabaseAgent
14 | from camel_database_agent.database.manager import DatabaseManager
15 | from camel_database_agent.database_base import TrainLevel
16 | 
17 | # Configure logging settings to show errors on stdout
18 | logging.basicConfig(
19 |     level=logging.ERROR,
20 |     format="%(message)s",
21 |     handlers=[logging.StreamHandler(sys.stdout)],
22 |     force=True,
23 | )
24 | # Set specific logging level for the application module
25 | logging.getLogger("camel_database_agent").setLevel(logging.INFO)
26 | logger = logging.getLogger(__name__)
27 | 
28 | # Configure pandas display options to show complete data
29 | pd.set_option("display.max_rows", None)  # Show all rows
30 | pd.set_option("display.max_columns", None)  # Show all columns
31 | pd.set_option("display.width", None)  # Auto-detect display width
32 | pd.set_option("display.max_colwidth", None)  # Show full content of each cell
33 | 
34 | # Define database connection string
35 | database_url = "sqlite:///database/sqlite/music.sqlite"
36 | 
37 | # Initialize the database agent with required components
38 | database_agent = DatabaseAgent(
39 |     interactive_mode=True,
40 |     database_manager=DatabaseManager(db_url=database_url),
41 |     # Configure LLM model
42 |     model=ModelFactory.create(
43 |         model_platform=ModelPlatformType.OPENAI,
44 |         model_type=ModelType.GPT_4O_MINI,
45 |         api_key=os.getenv("OPENAI_API_KEY"),
46 |         url=os.getenv("OPENAI_API_BASE_URL"),
47 |     ),
48 |     # Configure embedding model
49 |     embedding_model=OpenAIEmbedding(
50 |         api_key=os.getenv("OPENAI_API_KEY"),
51 |         url=os.getenv("OPENAI_API_BASE_URL"),
52 |     ),
53 | )
54 | 
55 | # Train agent's knowledge about the database schema
56 | database_agent.train_knowledge(
57 |     # Training level for database knowledge extraction
58 |     # MEDIUM level: Balances training time and knowledge depth by:
59 |     #  - Analyzing schema relationships
60 |     #  - Extracting representative sample data
61 |     #  - Generating a moderate number of query examples
62 |     level=TrainLevel.MEDIUM,
63 |     # Whether to retrain the knowledge base from scratch
64 |     # If True: Forces regeneration of all database insights and examples
65 |     # If False: Uses existing cached knowledge if available
66 |     reset_train=False,
67 | )
68 | 
69 | # Display database overview information
70 | print(f"{Fore.GREEN}Database Overview")
71 | print("=" * 50)
72 | print(f"{database_agent.get_summary()}\n\n{Fore.RESET}")
73 | 
74 | # Display recommended example questions
75 | print(f"{Fore.GREEN}Recommendation Question")
76 | print("=" * 50)
77 | print(f"{database_agent.get_recommendation_question()}\n\n{Fore.RESET}")
78 | 
79 | # Execute a sample query using natural language
80 | response = database_agent.ask(
81 |     session_id=str(uuid.uuid4()), question="List all playlists with more than 5 tracks"
82 | )
83 | 
84 | # Handle and display the query results
85 | if response.success:
86 |     if response.dataset is not None:
87 |         # Format successful results as a table
88 |         data = tabulate(tabular_data=response.dataset, headers='keys', tablefmt='psql')
89 |         print(f"{Fore.GREEN}{data}{Fore.RESET}")
90 |     else:
91 |         print(f"{Fore.GREEN}No results found.{Fore.RESET}")
92 |     # Display the SQL that was generated
93 |     print(f"{Fore.YELLOW}{response.sql}{Fore.RESET}")
94 | else:
95 |     # Display error message if query failed
96 |     print(f"{Fore.RED}+ {response.error}{Fore.RESET}")
97 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["hatchling"]
  3 | build-backend = "hatchling.build"
  4 | 
  5 | [project]
  6 | name = "camel-database-agent"
  7 | version = "0.2.0"
  8 | description = "An integration package connecting Database and Camel"
  9 | authors = [{ name = "Lei Zhang" }]
 10 | requires-python = ">=3.10,<3.13"
 11 | readme = "README.md"
 12 | license = "Apache-2.0"
 13 | dependencies = [
 14 |     "camel-ai[all]>=0.2.40",
 15 |     "tabulate>=0.9",
 16 |     "colorama>=0.4",
 17 |     "dotenv>=0.9",
 18 |     "PyMySQL==1.1.1",
 19 |     "psycopg>=3.2"
 20 | ]
 21 | 
 22 | [dependency-groups]
 23 | test = [
 24 |     "pytest>=7,<8",
 25 |     "mock>=5,<6",
 26 |     "pytest-asyncio>=0.23.0,<0.24",
 27 |     "testcontainers>=4.9"
 28 | ]
 29 | dev=[
 30 |     "ruff>=0.7,<0.8",
 31 |     "mypy>=1.5.1,<2",
 32 |     "pre-commit>=3,<4",
 33 |     "types-tabulate>=0.9",
 34 |     "pandas-stubs>=2.2",
 35 |     "uv==0.6.5",
 36 | ]
 37 | 
 38 | [project.urls]
 39 | Repository = "https://github.com/coolbeevip/camel-database-agent"
 40 | 
 41 | [tool.uv]
 42 | default-groups = [
 43 |     "dev", "test"
 44 | ]
 45 | 
 46 | [tool.hatch.build.targets.sdist]
 47 | include = ["camel_database_agent"]
 48 | 
 49 | [tool.hatch.build.targets.wheel]
 50 | include = ["camel_database_agent"]
 51 | 
 52 | [tool.ruff]
 53 | line-length = 99
 54 | fix = true
 55 | target-version = "py39"
 56 | 
 57 | [tool.ruff.format]
 58 | quote-style = "preserve"
 59 | 
 60 | [tool.ruff.lint]
 61 | extend-select = [
 62 |     "I", # isort
 63 |     "B", # flake8-bugbear
 64 |     "C4", # flake8-comprehensions
 65 |     "PGH", # pygrep-hooks
 66 |     "RUF", # ruff
 67 |     "E",
 68 | ]
 69 | ignore = [
 70 |     "B028", # Warning without stacklevel
 71 |     "B904", # use 'raise ... from err'
 72 |     "B905", # use explicit 'strict=' parameter with 'zip()'
 73 |     "N818", #  Exception name should be named with an Error suffix
 74 |     "C416", # I think comprehension is more clear https://docs.astral.sh/ruff/rules/unnecessary-comprehension/
 75 |     "C408", # we have used lots of dict(...) instead of literal
 76 | ]
 77 | 
 78 | [tool.ruff.lint.pydocstyle]
 79 | convention = "google"
 80 | 
 81 | [tool.ruff.lint.isort]
 82 | known-first-party = ["camel_database_agent"]
 83 | 
 84 | [tool.ruff.lint.per-file-ignores]
 85 | "camel_database_agent/database_prompt.py" = ["E501"]
 86 | "camel_database_agent/datagen/prompts.py" = ["E501"]
 87 | "camel_database_agent/database/prompts.py" = ["E501"]
 88 | 
 89 | [tool.pytest.ini_options]
 90 | pythonpath = ["."]
 91 | addopts = ["--strict-markers"]
 92 | 
 93 | [tool.coverage.report]
 94 | include_namespace_packages = true
 95 | 
 96 | [tool.mypy]
 97 | exclude = [
 98 |     '\.venv/.*',  # exclude .venv directory
 99 |     'site-packages/.*',  # exclude site-packages
100 | ]
101 | 
102 | [[tool.mypy.overrides]]
103 | module = [
104 |     "camel.*",
105 |     "colorama",
106 |     "pandas",
107 |     "qdrant_client.*",
108 |     "tabulate"
109 | ]
110 | ignore_missing_imports = true


--------------------------------------------------------------------------------
/scripts/check_imports.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import traceback
 3 | from importlib.machinery import SourceFileLoader
 4 | 
 5 | if __name__ == "__main__":
 6 |     files = sys.argv[1:]
 7 |     has_failure = False
 8 |     for file in files:
 9 |         try:
10 |             SourceFileLoader("x", file).load_module()
11 |         except Exception:
12 |             has_failure = True
13 |             print(file)
14 |             traceback.print_exc()
15 |             print()
16 | 
17 |     sys.exit(1 if has_failure else 0)
18 | 


--------------------------------------------------------------------------------
/scripts/lint_imports.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -eu
 4 | 
 5 | # Initialize a variable to keep track of errors
 6 | errors=0
 7 | 
 8 | # make sure not importing from langchain or langchain_experimental
 9 | git --no-pager grep '^from langchain\.' . && errors=$((errors+1))
10 | git --no-pager grep '^from langchain_experimental\.' . && errors=$((errors+1))
11 | 
12 | # Decide on an exit status based on the errors
13 | if [ "$errors" -gt 0 ]; then
14 |     exit 1
15 | else
16 |     exit 0
17 | fi


--------------------------------------------------------------------------------
/spider2_lite/README.md:
--------------------------------------------------------------------------------
1 | Download local_sqlite.zip
2 | 
3 | https://drive.usercontent.google.com/download?id=1coEVsCZq-Xvj9p2TnhBFoFTsY-UoYGmG&authuser=0
4 | 
5 | Download spider2-lite.jsonl
6 | 
7 | https://github.com/xlang-ai/Spider2/blob/main/spider2-lite/spider2-lite.jsonl


--------------------------------------------------------------------------------
/spider2_lite/database/README.md:
--------------------------------------------------------------------------------
1 | Download the database file local_sqlite.zip from the following link and extract it to the current directory.
2 | 
3 | https://drive.usercontent.google.com/download?id=1coEVsCZq-Xvj9p2TnhBFoFTsY-UoYGmG&authuser=0


--------------------------------------------------------------------------------
/spider2_lite/spider2_run.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os
 4 | import sys
 5 | 
 6 | from camel.models import ModelFactory
 7 | from camel.types import ModelPlatformType
 8 | from colorama import Fore
 9 | from dotenv import load_dotenv
10 | 
11 | from camel_database_agent import DatabaseAgent
12 | from camel_database_agent.database_base import TrainLevel
13 | 
14 | load_dotenv()
15 | 
16 | """设置日志"""
17 | logging.basicConfig(
18 |     level=logging.FATAL,
19 |     format="%(message)s",
20 |     handlers=[logging.StreamHandler(sys.stdout)],
21 |     force=True,
22 | )
23 | logging.getLogger("camel_database_agent").setLevel(logging.INFO)
24 | logger = logging.getLogger(__name__)
25 | 
26 | model = ModelFactory.create(
27 |     model_platform=ModelPlatformType.OPENAI_COMPATIBLE_MODEL,
28 |     model_type=os.environ["MODEL_NAME"],
29 |     api_key=os.environ["OPENAI_API_KEY"],
30 |     url=os.environ["OPENAI_API_BASE_URL"],
31 | )
32 | 
33 | current_dir = os.path.dirname(os.path.abspath(__file__))
34 | spider2_sqlite_database = os.path.join(current_dir, "database", "local_sqlite")
35 | spider2_workspace = os.path.join(current_dir, "workspace", os.environ["MODEL_NAME"])
36 | if not os.path.exists(spider2_workspace):
37 |     os.makedirs(spider2_workspace)
38 | else:
39 |     pass
40 |     # shutil.rmtree(spider2_workspace)
41 |     # os.makedirs(spider2_workspace)
42 | 
43 | with open(os.path.join(current_dir, "spider2-lite.jsonl"), "r") as f:
44 |     examples = [json.loads(line) for line in f]
45 |     examples = [example for example in examples if example["instance_id"].startswith("local")]
46 |     examples = sorted(examples, key=lambda x: x["db"])
47 | 
48 |     database_agent_cache = {}
49 |     success = 0
50 |     failed = 0
51 |     total = len(examples)
52 |     for example in examples:
53 |         try:
54 |             instance_id = example["instance_id"]
55 |             db_id = example["db"]
56 |             question = example["question"]
57 |             db_path = os.path.join(spider2_sqlite_database, f"{db_id}.sqlite")
58 |             if not os.path.exists(db_path):
59 |                 print(f"{Fore.RED}database {db_id} not found{Fore.RESET}")
60 |                 continue
61 |             if db_id not in database_agent_cache:
62 |                 database_agent_cache[db_id] = database_agent = DatabaseAgent(
63 |                     db_url=f"sqlite:///{db_path}",
64 |                     model=model,
65 |                     data_path=os.path.join(str(spider2_workspace), db_id),
66 |                 )
67 |                 database_agent.train_knowledge(
68 |                     level=TrainLevel.LOW,
69 |                     reset_train=False,
70 |                 )
71 |             database_agent = database_agent_cache[db_id]
72 |             response = database_agent.ask(
73 |                 session_id=instance_id,
74 |                 question=question,
75 |             )
76 | 
77 |             example["sql"] = response.sql
78 |             if response.success:
79 |                 example["result"] = True
80 |                 success += 1
81 |             else:
82 |                 example["result"] = False
83 |                 failed += 1
84 |         except Exception as e:
85 |             print(f"{Fore.RED}{e!s}{Fore.RESET}")
86 |         finally:
87 |             print(f"process {success + failed}/{total}")
88 | 
89 |         with open(os.path.join(spider2_workspace, "spider2-lite-result.jsonl"), "a") as f:
90 |             f.write(json.dumps(example) + "\n")
91 | 
92 |     print(
93 |         f"success: {success}, failed: {failed}, total: {total}",
94 |     )
95 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/tests/__init__.py


--------------------------------------------------------------------------------
/tests/integration_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/tests/integration_tests/__init__.py


--------------------------------------------------------------------------------
/tests/integration_tests/data.sql:
--------------------------------------------------------------------------------
 1 | -- 插入用户数据
 2 | INSERT INTO `user` (`username`, `password_hash`, `mobile`, `email`) VALUES
 3 | ('张三', 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855', '13800138000', 'john@example.com'),
 4 | ('李四', '5e884898da28047151d0e56f8dc6292773603d0d6aabbdd62a11ef721d1542d8', '13912345678', 'jane@test.com'),
 5 | ('王五', '6cf615d5bcaac778352a8f1f3360d23f02f34ec182e259897fd6ce485d7870d4', '13698765432', NULL);
 6 | 
 7 | -- 插入商品数据
 8 | INSERT INTO `product` (`product_name`, `description`, `price`, `stock`) VALUES
 9 | ('iPhone 15', '6.1英寸 128GB 黑色', 6999.00, 100),
10 | ('小米电视65寸', '4K超高清智能电视', 3299.00, 50),
11 | ('华为MateBook X', '13英寸轻薄笔记本', 8999.00, 30),
12 | ('耐克运动鞋', 'Air Max 270 男款', 899.00, 200),
13 | ('《深入浅出MySQL》', '数据库技术书籍', 99.90, 500);
14 | 
15 | -- 插入用户地址（每个用户1个地址）
16 | INSERT INTO `user_address` (`user_id`, `receiver`, `phone`, `province`, `city`, `district`, `detail_address`) VALUES
17 | (1, '张三', '13800138000', '北京市', '市辖区', '朝阳区', '建国路88号'),
18 | (2, '李四', '13912345678', '上海市', '市辖区', '浦东新区', '陆家嘴环路100号'),
19 | (3, '王五', '13698765432', '广东省', '深圳市', '南山区', '科技园路1号');
20 | 
21 | -- 插入订单数据（3个订单）
22 | INSERT INTO `order_info` (`user_id`, `order_no`, `total_amount`, `order_status`, `payment_method`, `address_id`, `payment_time`) VALUES
23 | (1, 'ORDER202311011001', 13998.00, 1, 'ALIPAY', 1, '2023-11-01 10:05:00'),
24 | (2, 'ORDER202311021002', 12197.00, 1, 'WECHAT', 2, '2023-11-02 14:30:00'),
25 | (3, 'ORDER202311031003', 199.80, 0, NULL, 3, NULL);
26 | 
27 | -- 插入订单明细
28 | INSERT INTO `order_item` (`order_id`, `product_id`, `quantity`, `unit_price`) VALUES
29 | -- 订单1（购买2个iPhone）
30 | (1, 1, 2, 6999.00),
31 | -- 订单2（1台电视+1本书）
32 | (2, 2, 1, 3299.00),
33 | (2, 5, 2, 99.90),
34 | -- 订单3（2双运动鞋）
35 | (3, 4, 2, 899.00);
36 | 
37 | -- 插入支付信息（已完成支付的订单）
38 | INSERT INTO `payment_info` (`order_id`, `transaction_no`, `payment_status`, `payment_amount`, `payment_method`, `payment_time`) VALUES
39 | (1, 'PAY20231101123456', 'SUCCESS', 13998.00, 'ALIPAY', '2023-11-01 10:05:00'),
40 | (2, 'PAY20231102567890', 'SUCCESS', 12197.00, 'WECHAT', '2023-11-02 14:30:00');


--------------------------------------------------------------------------------
/tests/integration_tests/ddl.sql:
--------------------------------------------------------------------------------
 1 | -- 用户表
 2 | CREATE TABLE user (
 3 |   user_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, -- 用户ID
 4 |   username TEXT NOT NULL UNIQUE, -- 用户名
 5 |   password_hash TEXT NOT NULL, -- 密码哈希值
 6 |   mobile TEXT NOT NULL UNIQUE, -- 手机号
 7 |   email TEXT, -- 邮箱
 8 |   created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 创建时间
 9 |   updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP -- 更新时间
10 | ); -- 用户信息表
11 | 
12 | -- 商品表
13 | CREATE TABLE product (
14 |   product_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, -- 商品ID
15 |   product_name TEXT NOT NULL, -- 商品名称
16 |   description TEXT, -- 商品描述
17 |   price REAL NOT NULL, -- 商品单价
18 |   stock INTEGER NOT NULL DEFAULT 0, -- 库存数量
19 |   status INTEGER NOT NULL DEFAULT 1, -- 状态(1:上架 0:下架)
20 |   created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 创建时间
21 |   updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP -- 更新时间
22 | ); -- 商品信息表
23 | 
24 | -- 订单表
25 | CREATE TABLE order_info (
26 |   order_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, -- 订单ID
27 |   user_id INTEGER NOT NULL, -- 用户ID
28 |   order_no TEXT NOT NULL UNIQUE, -- 订单编号（业务唯一）
29 |   total_amount REAL NOT NULL, -- 订单总金额
30 |   order_status INTEGER NOT NULL DEFAULT 0, -- 订单状态(0:待支付 1:已支付 2:已发货 3:已完成 4:已取消)
31 |   payment_method TEXT, -- 支付方式
32 |   address_id INTEGER NOT NULL, -- 收货地址ID
33 |   payment_time DATETIME, -- 支付时间
34 |   created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 创建时间
35 |   updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 更新时间
36 |   FOREIGN KEY (user_id) REFERENCES user (user_id) ON DELETE RESTRICT ON UPDATE CASCADE,
37 |   FOREIGN KEY (address_id) REFERENCES user_address (address_id) ON DELETE RESTRICT ON UPDATE CASCADE
38 | ); -- 订单主表
39 | 
40 | -- 订单明细表
41 | CREATE TABLE order_item (
42 |   item_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, -- 明细ID
43 |   order_id INTEGER NOT NULL, -- 订单ID
44 |   product_id INTEGER NOT NULL, -- 商品ID
45 |   quantity INTEGER NOT NULL, -- 购买数量
46 |   unit_price REAL NOT NULL, -- 成交单价
47 |   total_price REAL AS (quantity * unit_price) STORED, -- 明细总价
48 |   FOREIGN KEY (order_id) REFERENCES order_info (order_id) ON DELETE CASCADE ON UPDATE CASCADE,
49 |   FOREIGN KEY (product_id) REFERENCES product (product_id) ON DELETE RESTRICT ON UPDATE CASCADE
50 | ); -- 订单明细表
51 | 
52 | -- 用户地址表
53 | CREATE TABLE user_address (
54 |   address_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, -- 地址ID
55 |   user_id INTEGER NOT NULL, -- 用户ID
56 |   receiver TEXT NOT NULL, -- 收货人
57 |   phone TEXT NOT NULL, -- 联系电话
58 |   province TEXT NOT NULL, -- 省
59 |   city TEXT NOT NULL, -- 市
60 |   district TEXT NOT NULL, -- 区
61 |   detail_address TEXT NOT NULL, -- 详细地址
62 |   is_default INTEGER NOT NULL DEFAULT 0, -- 是否默认地址
63 |   created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 创建时间
64 |   updated_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 更新时间
65 |   FOREIGN KEY (user_id) REFERENCES user (user_id) ON DELETE CASCADE ON UPDATE CASCADE
66 | ); -- 用户地址表
67 | 
68 | -- 支付信息表
69 | CREATE TABLE payment_info (
70 |   payment_id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, -- 支付ID
71 |   order_id INTEGER NOT NULL, -- 订单ID
72 |   transaction_no TEXT NOT NULL UNIQUE, -- 第三方交易号
73 |   payment_status TEXT NOT NULL, -- 支付状态
74 |   payment_amount REAL NOT NULL, -- 实际支付金额
75 |   payment_method TEXT NOT NULL, -- 支付方式
76 |   payment_time DATETIME NOT NULL, -- 支付时间
77 |   created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP, -- 创建时间
78 |   FOREIGN KEY (order_id) REFERENCES order_info (order_id) ON DELETE RESTRICT ON UPDATE CASCADE
79 | ); -- 支付信息表


--------------------------------------------------------------------------------
/tests/integration_tests/query.md:
--------------------------------------------------------------------------------
1 | ## 请列出所有已支付订单的详细信息。
2 | SELECT o.order_no, o.total_amount, p.payment_status FROM order_info o JOIN payment_info p ON o.order_id = p.order_id WHERE o.order_status = 1;
3 | 
4 | ## 哪些用户在最近一个月内下过订单？
5 | SELECT DISTINCT u.username FROM user u JOIN order_info o ON u.user_id = o.user_id WHERE o.created_at >= DATE('now', '-1 month');
6 | 
7 | ## 我想知道每种支付方式的订单数量。
8 | SQL查询: SELECT payment_method, COUNT(*) AS order_count FROM order_info GROUP BY payment_method;


--------------------------------------------------------------------------------
/tests/integration_tests/test_database_agent.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import uuid
 3 | from unittest import TestCase
 4 | 
 5 | from camel.models import ModelFactory
 6 | from camel.types import ModelPlatformType, ModelType
 7 | from dotenv import load_dotenv
 8 | 
 9 | from camel_database_agent import DatabaseAgent
10 | from camel_database_agent.database.manager import DatabaseManager
11 | from camel_database_agent.database_base import MessageLogToEmpty
12 | 
13 | load_dotenv("../../.env")  # isort:skip
14 | 
15 | 
16 | class TestDatabaseAgent(TestCase):
17 |     database_manager: DatabaseManager
18 | 
19 |     @classmethod
20 |     def setUpClass(cls) -> None:
21 |         current_dir = os.path.dirname(os.path.abspath(__file__))
22 |         cls.database_manager = DatabaseManager(db_url="sqlite:///:memory:")
23 |         with open(os.path.join(current_dir, "ddl.sql"), "r") as f:
24 |             cls.database_manager.execute(f.read(), ignore_sql_check=True)
25 |         with open(os.path.join(current_dir, "data.sql"), "r") as f:
26 |             cls.database_manager.execute(f.read(), ignore_sql_check=True)
27 | 
28 |     def test_ask(self) -> None:
29 |         database_agent = DatabaseAgent(
30 |             database_manager=self.database_manager,
31 |             model=ModelFactory.create(
32 |                 model_platform=ModelPlatformType.DEFAULT,
33 |                 model_type=ModelType.DEFAULT,
34 |             ),
35 |             language="Chinese",
36 |         )
37 | 
38 |         database_agent.train_knowledge(reset_train=True)
39 |         database_agent.ask(
40 |             session_id=str(uuid.uuid4()),
41 |             question="查询最近30天内成功支付用户的订单详情,包括用户信息、订单信息、支付状态、订单明细以及收货地址",
42 |             message_log=MessageLogToEmpty(),
43 |         )
44 | 


--------------------------------------------------------------------------------
/tests/integration_tests/test_database_knowledge.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest import TestCase
 3 | 
 4 | from camel.embeddings import OpenAIEmbedding
 5 | from camel.models import ModelFactory
 6 | from camel.types import ModelPlatformType, ModelType
 7 | from dotenv import load_dotenv
 8 | 
 9 | from camel_database_agent import DataQueryInferencePipeline
10 | from camel_database_agent.database.manager import DatabaseManager
11 | from camel_database_agent.database.schema import (
12 |     DatabaseSchemaParse,
13 | )
14 | from camel_database_agent.knowledge.knowledge import DatabaseKnowledge
15 | from camel_database_agent.knowledge.knowledge_qdrant import (
16 |     DatabaseKnowledgeQdrant,
17 | )
18 | 
19 | load_dotenv("../../.env")  # isort:skip
20 | 
21 | 
22 | class TestDatabaseKnowledge(TestCase):
23 |     parse: DatabaseSchemaParse
24 |     knowledge: DatabaseKnowledge
25 |     database_manager: DatabaseManager
26 | 
27 |     @classmethod
28 |     def setUpClass(cls) -> None:
29 |         model = ModelFactory.create(
30 |             model_platform=ModelPlatformType.DEFAULT,
31 |             model_type=ModelType.DEFAULT,
32 |         )
33 |         cls.database_manager = DatabaseManager(db_url="sqlite:///:memory:")
34 |         cls.parse = DatabaseSchemaParse(
35 |             database_manager=cls.database_manager,
36 |             model=model,
37 |         )
38 |         cls.knowledge = DatabaseKnowledgeQdrant(embedding=OpenAIEmbedding(), model=model)
39 | 
40 |     def test_qdrant_with_ddl(self) -> None:
41 |         current_dir = os.path.dirname(os.path.abspath(__file__))
42 |         with open(os.path.join(current_dir, "ddl.sql"), "r") as f:
43 |             self.knowledge.add(self.parse.parse_ddl_record(f.read()).data)
44 | 
45 |         ddl_records = self.knowledge.query_ddl("查询用户表中的所有用户信息", top_k=2)
46 |         assert len(ddl_records) == 2
47 | 
48 |     def test_qdrant_with_query(self) -> None:
49 |         current_dir = os.path.dirname(os.path.abspath(__file__))
50 |         with open(os.path.join(current_dir, "ddl.sql"), "r") as f:
51 |             ddl_sql = f.read()
52 |             self.database_manager.execute(ddl_sql, ignore_sql_check=True)
53 |         with open(os.path.join(current_dir, "data.sql"), "r") as f:
54 |             data_sql = f.read()
55 |             self.database_manager.execute(data_sql, ignore_sql_check=True)
56 | 
57 |         pipeline = DataQueryInferencePipeline(
58 |             ddl_sql=ddl_sql,
59 |             data_sql=data_sql,
60 |             model=ModelFactory.create(
61 |                 model_platform=ModelPlatformType.DEFAULT,
62 |                 model_type=ModelType.DEFAULT,
63 |             ),
64 |             database_manager=self.database_manager,
65 |         )
66 |         query_records = pipeline.generate(10).data
67 |         self.knowledge.add(records=query_records)
68 |         assert len(self.knowledge.get_query_collection_sample(5)) == 5
69 | 


--------------------------------------------------------------------------------
/tests/integration_tests/test_database_manager.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest import TestCase
 3 | 
 4 | from dotenv import load_dotenv
 5 | 
 6 | from camel_database_agent.database.manager import DatabaseManager, SQLExecutionError
 7 | 
 8 | load_dotenv()  # isort:skip
 9 | 
10 | 
11 | class TestDatabaseManager(TestCase):
12 |     database_manager: DatabaseManager
13 | 
14 |     @classmethod
15 |     def setUpClass(cls) -> None:
16 |         cls.database_manager = DatabaseManager(db_url="sqlite:///:memory:")
17 |         current_dir = os.path.dirname(os.path.abspath(__file__))
18 |         with open(os.path.join(current_dir, "ddl.sql"), "r") as f:
19 |             cls.database_manager.execute(f.read(), ignore_sql_check=True)
20 |         with open(os.path.join(current_dir, "data.sql"), "r") as f:
21 |             cls.database_manager.execute(f.read(), ignore_sql_check=True)
22 | 
23 |     def test_select_with_fail(self) -> None:
24 |         sql = "select * from no_exist_table"
25 |         with self.assertRaises(SQLExecutionError) as context:
26 |             self.database_manager.select(sql)
27 |         assert context.exception.sql == sql
28 | 


--------------------------------------------------------------------------------
/tests/integration_tests/test_database_schema_dialect.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest import TestCase
 3 | 
 4 | from camel.models import ModelFactory
 5 | from camel.types import ModelPlatformType, ModelType
 6 | from colorama import Fore
 7 | from dotenv import load_dotenv
 8 | 
 9 | from camel_database_agent.database.dialect.dialect import (
10 |     DatabaseSchemaDialect,
11 | )
12 | from camel_database_agent.database.dialect.dialect_sqlite import (
13 |     DatabaseSchemaDialectSqlite,
14 | )
15 | from camel_database_agent.database.manager import DatabaseManager
16 | 
17 | load_dotenv("../../.env")  # isort:skip
18 | 
19 | 
20 | class TestDatabaseSchemaDialect(TestCase):
21 |     dialect: DatabaseSchemaDialect
22 | 
23 |     @classmethod
24 |     def setUpClass(cls) -> None:
25 |         model = ModelFactory.create(
26 |             model_platform=ModelPlatformType.DEFAULT,
27 |             model_type=ModelType.DEFAULT,
28 |         )
29 |         database_manager = DatabaseManager(db_url="sqlite:///:memory:")
30 |         current_dir = os.path.dirname(os.path.abspath(__file__))
31 |         with open(os.path.join(current_dir, "ddl.sql"), "r") as f:
32 |             database_manager.execute(f.read(), ignore_sql_check=True)
33 |         with open(os.path.join(current_dir, "data.sql"), "r") as f:
34 |             database_manager.execute(f.read(), ignore_sql_check=True)
35 | 
36 |         DatabaseSchemaDialect.register(DatabaseSchemaDialectSqlite)
37 | 
38 |         cls.dialect = DatabaseSchemaDialect.get_dialect(
39 |             dialect_name=database_manager.dialect_name(),
40 |             database_manager=database_manager,
41 |             model=model,
42 |         )
43 | 
44 |     def test_get_schema(self) -> None:
45 |         ddl_sql = self.dialect.get_schema()
46 |         print(Fore.GREEN + ddl_sql)
47 |         assert ddl_sql is not None
48 | 
49 |         polished_ddl_sql = self.dialect.get_polished_schema()
50 |         print(Fore.BLUE + polished_ddl_sql)
51 |         assert polished_ddl_sql is not None
52 |         assert ddl_sql != polished_ddl_sql
53 | 
54 |     def test_sampled_data(self) -> None:
55 |         sample_sql = self.dialect.get_sampled_data()
56 |         print(Fore.GREEN + sample_sql)
57 |         assert sample_sql is not None
58 | 


--------------------------------------------------------------------------------
/tests/integration_tests/test_database_schema_parse.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest import TestCase
 3 | 
 4 | from camel.models import ModelFactory
 5 | from camel.types import ModelPlatformType, ModelType
 6 | from dotenv import load_dotenv
 7 | 
 8 | from camel_database_agent.database.manager import DatabaseManager
 9 | from camel_database_agent.database.schema import (
10 |     DatabaseSchemaParse,
11 |     SchemaParseResponse,
12 | )
13 | 
14 | load_dotenv("../../.env")  # isort:skip
15 | 
16 | 
17 | class TestDatabaseSchemaParse(TestCase):
18 |     parse: DatabaseSchemaParse
19 | 
20 |     @classmethod
21 |     def setUpClass(cls) -> None:
22 |         model = ModelFactory.create(
23 |             model_platform=ModelPlatformType.DEFAULT,
24 |             model_type=ModelType.DEFAULT,
25 |         )
26 |         cls.parse = DatabaseSchemaParse(
27 |             database_manager=DatabaseManager(db_url="sqlite:///:memory:"),
28 |             model=model,
29 |         )
30 | 
31 |     def test_parse_ddl_record(self) -> None:
32 |         current_dir = os.path.dirname(os.path.abspath(__file__))
33 |         with open(os.path.join(current_dir, "ddl.sql"), "r") as f:
34 |             schema_parse_response: SchemaParseResponse = self.parse.parse_ddl_record(f.read())
35 |             assert len(schema_parse_response.data) == 6
36 | 
37 |     def test_parse_dml_record(self) -> None:
38 |         current_dir = os.path.dirname(os.path.abspath(__file__))
39 |         with open(os.path.join(current_dir, "data.sql"), "r") as f:
40 |             schema_parse_response: SchemaParseResponse = self.parse.parse_sampled_record(f.read())
41 |             assert len(schema_parse_response.data) == 6
42 | 
43 |     def test_parse_query_record(self) -> None:
44 |         current_dir = os.path.dirname(os.path.abspath(__file__))
45 |         with open(os.path.join(current_dir, "query.md"), "r") as f:
46 |             schema_parse_response: SchemaParseResponse = self.parse.parse_query_record(f.read())
47 |             assert len(schema_parse_response.data) == 3
48 | 


--------------------------------------------------------------------------------
/tests/integration_tests/test_sql_query_inference_pipeline.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from unittest import TestCase
 3 | 
 4 | from camel.models import ModelFactory
 5 | from camel.types import ModelPlatformType, ModelType
 6 | from dotenv import load_dotenv
 7 | 
 8 | from camel_database_agent import DataQueryInferencePipeline
 9 | from camel_database_agent.database.manager import DatabaseManager
10 | 
11 | load_dotenv("../../.env")  # isort:skip
12 | 
13 | 
14 | class TestDataQueryInferencePipeline(TestCase):
15 |     pipeline: DataQueryInferencePipeline
16 | 
17 |     @classmethod
18 |     def setUpClass(cls) -> None:
19 |         database_manager = DatabaseManager(db_url="sqlite:///:memory:")
20 |         current_dir = os.path.dirname(os.path.abspath(__file__))
21 |         with open(os.path.join(current_dir, "ddl.sql"), "r") as f:
22 |             ddl_sql = f.read()
23 |             database_manager.execute(ddl_sql, ignore_sql_check=True)
24 |         with open(os.path.join(current_dir, "data.sql"), "r") as f:
25 |             data_sql = f.read()
26 |             database_manager.execute(data_sql, ignore_sql_check=True)
27 | 
28 |         cls.pipeline = DataQueryInferencePipeline(
29 |             ddl_sql=ddl_sql,
30 |             data_sql=data_sql,
31 |             model=ModelFactory.create(
32 |                 model_platform=ModelPlatformType.DEFAULT,
33 |                 model_type=ModelType.DEFAULT,
34 |             ),
35 |             database_manager=database_manager,
36 |         )
37 | 
38 |     def test_generate(self) -> None:
39 |         schema_response = self.pipeline.generate(10)
40 |         assert len(schema_response.data) == 10
41 | 


--------------------------------------------------------------------------------
/tests/unit_tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/coolbeevip/camel-database-agent/0557a4429123f4f7f9ab755be0891b1f3fc9e89d/tests/unit_tests/__init__.py


--------------------------------------------------------------------------------