├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature-mindsdb-request.md ├── PULL_REQUEST_TEMPLATE │ └── pull_request_template.md └── workflows │ ├── add_to_pr_review.yml │ ├── add_to_roadmap_project_v2.yml │ ├── cla.yml │ ├── docs.yml │ ├── release.yml │ └── test_prs.yml ├── .gitignore ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── MANIFEST.in ├── README.md ├── assets └── contributions-agreement │ └── cla.json ├── docs ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── agents.rst │ ├── conf.py │ ├── connection.rst │ ├── database.rst │ ├── handlers.rst │ ├── index.rst │ ├── jobs.rst │ ├── knowledge_bases.rst │ ├── ml_engines.rst │ ├── model.rst │ ├── project.rst │ ├── query.rst │ ├── server.rst │ ├── skills.rst │ ├── tables.rst │ └── views.rst ├── examples ├── data │ └── tokaido-rulebook.pdf ├── home_rentals.py ├── using_agents.py ├── using_agents_with_retrieval.py ├── using_agents_with_streaming_with_retrieval.py ├── using_agents_with_text2sql.py ├── using_agents_with_text2sql_streaming.py ├── using_database_mind_text2sql.py ├── using_openai.py └── working_with_tables.py ├── mindsdb_sdk ├── __about__.py ├── __init__.py ├── agents.py ├── connect.py ├── connectors │ ├── __init__.py │ └── rest_api.py ├── databases.py ├── handlers.py ├── jobs.py ├── knowledge_bases.py ├── ml_engines.py ├── models.py ├── projects.py ├── query.py ├── server.py ├── skills.py ├── tables.py ├── utils │ ├── __init__.py │ ├── agents.py │ ├── context.py │ ├── mind.py │ ├── objects_collection.py │ ├── openai.py │ ├── sql.py │ └── table_schema.py └── views.py ├── requirements.txt ├── requirements_test.txt ├── setup.py └── tests ├── __init__.py ├── test_agent_stream_process.py ├── test_openai.py └── test_sdk.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | labels: 5 | --- 6 | 7 | **Your Environment** 8 | 9 | * Python version: 10 | * Operating system: 11 | * Mindsdb Python SDK version: 12 | * Additional info if applicable: 13 | 14 | **Please describe your issue and how we can replicate it** 15 | 16 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature-mindsdb-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature Mindsdb Python SDK request 3 | about: Suggest an idea for this project 4 | labels: 5 | --- 6 | 7 | **Is your feature request related to a problem? Please describe.** 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 9 | 10 | **Describe the solution you'd like** 11 | A clear and concise description of what you want to happen. 12 | 13 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Fixes # 2 | 3 | ## Please describe what changes you made in as much detail as possible 4 | - 5 | 6 | -------------------------------------------------------------------------------- /.github/workflows/add_to_pr_review.yml: -------------------------------------------------------------------------------- 1 | name: Add Pull Requests to PR review project 2 | 3 | on: 4 | pull_request: 5 | types: 6 | - opened 7 | 8 | jobs: 9 | add-to-project: 10 | name: Add issue to project 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/add-to-project@v0.5.0 14 | with: 15 | project-url: https://github.com/orgs/mindsdb/projects/65 16 | github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} 17 | -------------------------------------------------------------------------------- /.github/workflows/add_to_roadmap_project_v2.yml: -------------------------------------------------------------------------------- 1 | name: Add issue to roadmap project 2 | on: 3 | issues: 4 | types: 5 | - opened 6 | jobs: 7 | add-to-project: 8 | name: Add issue to roadmap project 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: actions/add-to-project@v0.4.0 12 | with: 13 | project-url: https://github.com/orgs/mindsdb/projects/53 14 | github-token: ${{ secrets.ADD_TO_PROJECT_PAT }} -------------------------------------------------------------------------------- /.github/workflows/cla.yml: -------------------------------------------------------------------------------- 1 | name: "MindsDB CLA Assistant" 2 | on: 3 | issue_comment: 4 | types: [created] 5 | pull_request_target: 6 | types: [opened,closed,synchronize] 7 | 8 | permissions: 9 | actions: write 10 | contents: write 11 | pull-requests: write 12 | statuses: write 13 | 14 | jobs: 15 | CLAssistant: 16 | runs-on: mdb-dev 17 | steps: 18 | - name: "CLA Assistant" 19 | if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I hereby sign the CLA') || github.event_name == 'pull_request_target' 20 | uses: contributor-assistant/github-action@v2.6.1 21 | env: 22 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 23 | with: 24 | path-to-signatures: 'assets/contributions-agreement/cla.json' 25 | path-to-document: 'https://github.com/mindsdb/mindsdb/blob/main/assets/contributions-agreement/individual-contributor.md' 26 | branch: 'cla' 27 | allowlist: bot*, ZoranPandovski, torrmal, Stpmax, mindsdbadmin, ea-rus, tmichaeldb, dusvyat, hamishfagg, MinuraPunchihewa, martyna-mindsdb, lucas-koontz 28 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: "Pull Request Docs Check" 2 | 3 | on: 4 | push: 5 | branches: 6 | - docs 7 | 8 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 9 | permissions: 10 | contents: read 11 | pages: write 12 | id-token: write 13 | 14 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 15 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. 16 | concurrency: 17 | group: "pages" 18 | cancel-in-progress: false 19 | 20 | jobs: 21 | docs: 22 | runs-on: ubuntu-latest 23 | steps: 24 | - uses: actions/checkout@v1 25 | - uses: ammaraskar/sphinx-action@master 26 | with: 27 | docs-folder: "docs/" 28 | - name: Setup Pages 29 | uses: actions/configure-pages@v3 30 | - name: Upload artifact 31 | uses: actions/upload-pages-artifact@v1 32 | with: 33 | path: 'docs/build/html' 34 | - name: Deploy to GitHub Pages 35 | id: deployment 36 | uses: actions/deploy-pages@v2 37 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | test: 9 | runs-on: ${{ matrix.os }} 10 | strategy: 11 | matrix: 12 | os: [ubuntu-latest] 13 | python-version: ['3.8', '3.9', '3.10', '3.11'] 14 | steps: 15 | - uses: actions/checkout@v2 16 | - name: Set up Python ${{ matrix.python-version }} 17 | uses: actions/setup-python@v2 18 | with: 19 | python-version: ${{ matrix.python-version }} 20 | - name: Install dependencies 21 | run: | 22 | python -m pip install --upgrade pip==22.0.4 23 | pip install -r requirements.txt 24 | pip install -r requirements_test.txt 25 | pip install --no-cache-dir . 26 | - name: Run tests 27 | run: | 28 | if [ "$RUNNER_OS" == "Linux" ]; then 29 | 30 | env PYTHONPATH=./ pytest tests/ 31 | 32 | fi 33 | shell: bash 34 | 35 | 36 | deploy: 37 | runs-on: ubuntu-latest 38 | needs: test 39 | steps: 40 | - uses: actions/checkout@v2 41 | - name: Set up Python 42 | uses: actions/setup-python@v2 43 | with: 44 | python-version: '3.9' 45 | - name: Install dependencies 46 | run: | 47 | python -m pip install --upgrade pip==20.2.4 48 | pip install setuptools wheel twine 49 | - name: Build and publish 50 | env: 51 | TWINE_USERNAME: __token__ 52 | TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 53 | run: | 54 | python setup.py sdist 55 | twine upload dist/* 56 | -------------------------------------------------------------------------------- /.github/workflows/test_prs.yml: -------------------------------------------------------------------------------- 1 | name: PR workflow 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | 8 | jobs: 9 | test: 10 | runs-on: ${{ matrix.os }} 11 | strategy: 12 | matrix: 13 | os: [ubuntu-latest] 14 | python-version: ['3.8', '3.9', '3.10', '3.11'] 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Set up Python ${{ matrix.python-version }} 18 | uses: actions/setup-python@v2 19 | with: 20 | python-version: ${{ matrix.python-version }} 21 | - name: Install dependencies 22 | run: | 23 | python -m pip install --upgrade pip==22.0.4 24 | pip install -r requirements.txt 25 | pip install -r requirements_test.txt 26 | pip install --no-cache-dir . 27 | - name: Run tests 28 | run: | 29 | if [ "$RUNNER_OS" == "Linux" ]; then 30 | 31 | env PYTHONPATH=./ pytest tests/ 32 | 33 | fi 34 | shell: bash 35 | 36 | coverage: 37 | needs: test 38 | if: github.ref != 'refs/heads/stable' 39 | runs-on: ubuntu-latest 40 | permissions: 41 | pull-requests: write 42 | steps: 43 | - uses: actions/checkout@v3 44 | - name: Set up Python 3.8 45 | uses: actions/setup-python@v2 46 | with: 47 | python-version: 3.8 48 | 49 | - name: Install dependencies 50 | run: | 51 | python -m pip install --upgrade pip 52 | pip install flake8 53 | pip install -r requirements.txt 54 | pip install -r requirements_test.txt 55 | 56 | - name: Build coverage file 57 | run: | 58 | pytest --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=mindsdb_sdk tests/ | tee pytest-coverage.txt 59 | 60 | - name: Pytest coverage comment 61 | uses: MishaKav/pytest-coverage-comment@main 62 | with: 63 | pytest-coverage-path: ./pytest-coverage.txt 64 | junitxml-path: ./pytest.xml 65 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # Distribution / packaging 7 | .Python 8 | env/ 9 | build/ 10 | develop-eggs/ 11 | dist/ 12 | downloads/ 13 | eggs/ 14 | .eggs/ 15 | lib/ 16 | lib64/ 17 | parts/ 18 | sdist/ 19 | var/ 20 | wheels/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | share/python-wheels/ 25 | 26 | # visual studio code 27 | .DStore 28 | .DS_Store 29 | .idea 30 | .vscode 31 | 32 | # virtualenv 33 | .venv 34 | venv/ 35 | ENV/ 36 | 37 | # pyenv 38 | .python-version 39 | 40 | # Installer logs 41 | pip-log.txt 42 | pip-delete-this-directory.txt 43 | results 44 | mlp_img 45 | 46 | tests/home_rentals.csv 47 | tests/credentials.txt 48 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 2 | # Contributor Covenant Code of Conduct 3 | 4 | ## Our Pledge 5 | 6 | We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. 7 | 8 | We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. 9 | 10 | ## Our Standards 11 | 12 | Examples of behavior that contributes to a positive environment for our community include: 13 | 14 | * Demonstrating empathy and kindness toward other people 15 | * Being respectful of differing opinions, viewpoints, and experiences 16 | * Giving and gracefully accepting constructive feedback 17 | * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience 18 | * Focusing on what is best not just for us as individuals, but for the overall community 19 | 20 | Examples of unacceptable behavior include: 21 | 22 | * The use of sexualized language or imagery, and sexual attention or 23 | advances of any kind 24 | * Trolling, insulting or derogatory comments, and personal or political attacks 25 | * Public or private harassment 26 | * Publishing others' private information, such as a physical or email 27 | address, without their explicit permission 28 | * Other conduct which could reasonably be considered inappropriate in a 29 | professional setting 30 | 31 | ## Enforcement Responsibilities 32 | 33 | Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. 34 | 35 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. 36 | 37 | ## Scope 38 | 39 | This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. 40 | 41 | ## Enforcement 42 | 43 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at admin@mindsdb.com. All complaints will be reviewed and investigated promptly and fairly. 44 | 45 | All community leaders are obligated to respect the privacy and security of the reporter of any incident. 46 | 47 | 48 | ## Attribution 49 | 50 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0, 51 | available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 52 | 53 | Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity). 54 | 55 | [homepage]: https://www.contributor-covenant.org 56 | 57 | For answers to common questions about this code of conduct, see the FAQ at 58 | https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations. 59 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Mindsdb 2 | 3 | Being part of the core MindsDB team is accessible to anyone who is motivated and wants to be part of that journey! 4 | 5 | Please see below how to contribute to the project, also refer to the contributing documentation. 6 | 7 | ## How can you help us? 8 | 9 | * Report a bug 10 | * Improve documentation 11 | * Discuss the code implementation 12 | * Submit a bug fix 13 | * Propose new features 14 | * Test Mindsdb 15 | 16 | ## Code contributions 17 | 18 | In general, we follow the "fork-and-pull" Git workflow. 19 | 1. Fork the Mindsdb repository 20 | 2. Clone the repository 21 | 3. Make changes and commit them 22 | 4. Push your local branch to your fork 23 | 5. Submit a Pull request so that we can review your changes 24 | 6. Write a commit message 25 | 7. Make sure that the CI tests are GREEN 26 | 27 | > NOTE: Be sure to merge the latest from "upstream" before making a pull request! Also, make the PR to the staging branch. 28 | 29 | ## Feature and Bug reports 30 | We use GitHub issues to track bugs and features. Report them by opening a [new issue](https://github.com/mindsdb/mindsdb_python_sdk/issues/new/choose) and fill out all of the required inputs. 31 | 32 | ## Code review process 33 | 34 | The Pull Request reviews are done on a regular basis. Please, make sure you respond to our feedback/questions. 35 | 36 | ## Community 37 | 38 | If you have additional questions or you want to chat with the MindsDB core team, please join our [Slack community](https://mindsdb.com/joincommunity) or post at [Github Discussions](https://github.com/mindsdb/mindsdb_python_sdk/discussions). 39 | 40 | To get updates on MindsDB’s latest announcements, releases, and events, sign up for our [Monthly Community Newsletter](https://mindsdb.com/newsletter/?utm_medium=community&utm_source=github&utm_campaign=mindsdb%20repo). 41 | 42 | Join our mission of democratizing machine learning! 43 | 44 | ## Contributor Code of Conduct 45 | 46 | Please note that this project is released with a [Contributor Code of Conduct](https://github.com/mindsdb/mindsdb_python_sdk/blob/stable/CODE_OF_CONDUCT.md). By participating in this project, you agree to abide by its terms. 47 | 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 MindsDB Inc 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | prune tests* 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python MindsDB SDK 2 | 3 | The Python MindsDB SDK allows you to connect to a MindsDB server from Python using the HTTP API. 4 | 5 | ## Installation 6 | 7 | ``` 8 | pip install mindsdb_sdk 9 | ``` 10 | 11 | ## Example 12 | 13 | ### Connecting to the MindsDB server 14 | 15 | You can establish a connection to the MindsDB server using the SDK. Here are some examples: 16 | 17 | #### Connect to a local MindsDB server 18 | 19 | ```python 20 | import mindsdb_sdk 21 | con = mindsdb_sdk.connect() 22 | con = mindsdb_sdk.connect('http://127.0.0.1:47334') 23 | ``` 24 | 25 | #### Connect to the MindsDB Cloud 26 | 27 | ```python 28 | import mindsdb_sdk 29 | con = mindsdb_sdk.connect(login='a@b.com', password='-') 30 | con = mindsdb_sdk.connect('https://cloud.mindsdb.com', login='a@b.com', password='-') 31 | ``` 32 | 33 | #### Connect to a MindsDB Pro server 34 | 35 | ```python 36 | import mindsdb_sdk 37 | con = mindsdb_sdk.connect('http://', login='a@b.com', password='-', is_managed=True) 38 | ``` 39 | 40 | ## Basic usage 41 | 42 | Once connected to the server, you can perform various operations. Here are some examples: 43 | 44 | ```python 45 | # Get a list of databases 46 | databases = con.databases.list() 47 | 48 | # Get a specific database 49 | database = databases[0] # Database type object 50 | 51 | # Perform an SQL query 52 | query = database.query('select * from table1') 53 | print(query.fetch()) 54 | 55 | # Create a table 56 | table = database.tables.create('table2', query) 57 | 58 | # Get a project 59 | project = con.projects.proj 60 | 61 | # or use mindsdb project 62 | project = con 63 | 64 | # Perform an SQL query within a project 65 | query = project.query('select * from database.table join model1') 66 | 67 | # Create a view 68 | view = project.views.create('view1', query=query) 69 | 70 | # Get a list of views 71 | views = project.views.list() 72 | view = views[0] 73 | df = view.fetch() 74 | 75 | # Get a list of models 76 | models = project.models.list() 77 | model = models[0] 78 | 79 | # Use a model for prediction 80 | result_df = model.predict(df) 81 | result_df = model.predict(query) 82 | 83 | # Create a model 84 | timeseries_options = { 85 | 'order': 'date', 86 | 'window': 5, 87 | 'horizon': 1 88 | } 89 | model = project.models.create( 90 | 'rentals_model', 91 | predict='price', 92 | query=query, 93 | timeseries_options=timeseries_options 94 | ) 95 | 96 | # Describe a model 97 | model.describe() 98 | ``` 99 | 100 | You can find more examples in this [Google colab notebook]( 101 | https://colab.research.google.com/drive/1QouwAR3saFb9ffthrIs1LSH5COzyQa11#scrollTo=k6IbwsKRPQCR 102 | ) 103 | 104 | ## Examples 105 | 106 | https://github.com/mindsdb/mindsdb_python_sdk/tree/staging/examples 107 | 108 | ## API Documentation 109 | 110 | The API documentation for the MindsDB SDK can be found at https://mindsdb.github.io/mindsdb_python_sdk/. 111 | 112 | ### Generating API docs locally: 113 | 114 | ```commandline 115 | cd docs 116 | pip install -r requirements.txt 117 | make html 118 | ``` 119 | 120 | The online documentation is automatically updated by pushing changes to the docs branch. 121 | 122 | 123 | ## Testing 124 | 125 | To run all the tests for the components, use the following command: 126 | 127 | ```bash 128 | env PYTHONPATH=./ pytest 129 | ``` 130 | 131 | ## Contributing 132 | 133 | We welcome contributions to the MindsDB SDK. If you'd like to contribute, please refer to the contribution guidelines for more information. 134 | 135 | ## License 136 | 137 | The MindsDB SDK is licensed under the MIT License. Feel free to use and modify it according to your needs 138 | 139 | -------------------------------------------------------------------------------- /assets/contributions-agreement/cla.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindsdb/mindsdb_python_sdk/c8c51f2597f04d30e6d16b798a304153e7bd9233/assets/contributions-agreement/cla.json -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | pandas >= 1.3.5 3 | mindsdb-sql >= 0.7.0, < 0.8.0 4 | 5 | sphinx 6 | sphinx-rtd-theme 7 | -------------------------------------------------------------------------------- /docs/source/agents.rst: -------------------------------------------------------------------------------- 1 | Agents 2 | ------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.agents 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | import os 9 | import sys 10 | sys.path.insert(0, os.path.abspath(os.path.join('..', '..'))) 11 | 12 | project = 'Mindsdb python SDK' 13 | copyright = '2023, MindsDB Inc' 14 | author = 'MindsDB Inc' 15 | 16 | # -- General configuration --------------------------------------------------- 17 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 18 | 19 | extensions = [ 20 | 'sphinx.ext.autodoc', 21 | 'sphinx.ext.napoleon', 22 | 'sphinx.ext.autosectionlabel' 23 | ] 24 | 25 | templates_path = ['_templates'] 26 | exclude_patterns = [] 27 | 28 | 29 | 30 | # -- Options for HTML output ------------------------------------------------- 31 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output 32 | 33 | html_theme = 'sphinx_rtd_theme' 34 | html_static_path = ['_static'] 35 | -------------------------------------------------------------------------------- /docs/source/connection.rst: -------------------------------------------------------------------------------- 1 | Connection to mindsdb server 2 | ---------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.connect 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/database.rst: -------------------------------------------------------------------------------- 1 | Databases 2 | ---------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.databases 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/source/handlers.rst: -------------------------------------------------------------------------------- 1 | Handlers 2 | ------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.handlers 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | Welcome to Mindsdb python SDK documentation! 2 | ============================================ 3 | 4 | Getting Started 5 | =============== 6 | 7 | Source code 8 | ----------- 9 | 10 | ``_ 11 | 12 | Installation 13 | ------------ 14 | 15 | .. code-block:: console 16 | 17 | pip install mindsdb_sdk 18 | 19 | Connect 20 | ------- 21 | 22 | .. code-block:: python 23 | 24 | import mindsdb_sdk 25 | 26 | # Connect to local server 27 | 28 | server = mindsdb_sdk.connect() 29 | server = mindsdb_sdk.connect('http://127.0.0.1:47334') 30 | 31 | # Connect to cloud server 32 | 33 | server = mindsdb_sdk.connect(email='a@b.com', password='-') 34 | server = mindsdb_sdk.connect('https://cloud.mindsdb.com', login='a@b.com', password='-') 35 | 36 | # Connect to MindsDB Pro 37 | 38 | server = mindsdb_sdk.connect('http://', login='a@b.com', password='-', is_managed=True) 39 | 40 | Base usage 41 | ---------- 42 | 43 | .. code-block:: python 44 | 45 | # database 46 | databases = server.list_databases() 47 | 48 | database = databases[0] # Database type object 49 | 50 | # sql query 51 | query = database.query('select * from table1') 52 | print(query.fetch()) 53 | 54 | # create table 55 | table = database.create_table('table2', query) 56 | 57 | 58 | # project 59 | project = server.get_project('proj') 60 | 61 | # sql query 62 | query = project.query('select * from database.table join model1') 63 | 64 | # create view 65 | view = project.create_view( 66 | 'view1', 67 | query=query 68 | ) 69 | 70 | # get view 71 | views = project.list_views() 72 | view = views[0] 73 | df = view.fetch() 74 | 75 | # get model 76 | models = project.list_models() 77 | model = models[0] 78 | 79 | # using model 80 | result_df = model.predict(df) 81 | result_df = model.predict(query) 82 | 83 | # create model 84 | model = project.create_model( 85 | 'rentals_model', 86 | predict='price', 87 | query=query, 88 | ) 89 | 90 | More 91 | 92 | More examples 93 | ------------- 94 | 95 | ``_ 96 | 97 | API documentation 98 | ================= 99 | 100 | .. toctree:: 101 | :maxdepth: 1 102 | :caption: Connection: 103 | 104 | connection 105 | 106 | .. toctree:: 107 | :maxdepth: 1 108 | :caption: Modules: 109 | 110 | server 111 | database 112 | 113 | project 114 | handlers 115 | 116 | ml_engines 117 | model 118 | tables 119 | views 120 | query 121 | jobs 122 | 123 | knowledge_bases 124 | skills 125 | agents 126 | 127 | 128 | Indices and tables 129 | ------------------ 130 | 131 | * :ref:`genindex` 132 | * :ref:`modindex` 133 | * :ref:`search` -------------------------------------------------------------------------------- /docs/source/jobs.rst: -------------------------------------------------------------------------------- 1 | Jobs 2 | ------------------------- 3 | 4 | .. _my-reference-label: 5 | 6 | 7 | .. automodule:: mindsdb_sdk.jobs 8 | :members: 9 | :undoc-members: 10 | :show-inheritance: -------------------------------------------------------------------------------- /docs/source/knowledge_bases.rst: -------------------------------------------------------------------------------- 1 | Knowledge bases 2 | ------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.knowledge_bases 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/source/ml_engines.rst: -------------------------------------------------------------------------------- 1 | ML Engines 2 | ------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.ml_engines 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/source/model.rst: -------------------------------------------------------------------------------- 1 | Models 2 | ------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.models 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/source/project.rst: -------------------------------------------------------------------------------- 1 | Projects 2 | --------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.projects 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/query.rst: -------------------------------------------------------------------------------- 1 | Query 2 | ------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.query 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | 9 | -------------------------------------------------------------------------------- /docs/source/server.rst: -------------------------------------------------------------------------------- 1 | Server 2 | -------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.server 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: 8 | -------------------------------------------------------------------------------- /docs/source/skills.rst: -------------------------------------------------------------------------------- 1 | Skills 2 | ------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.skills 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/source/tables.rst: -------------------------------------------------------------------------------- 1 | Tables 2 | ------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.tables 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /docs/source/views.rst: -------------------------------------------------------------------------------- 1 | Views 2 | ------------------------- 3 | 4 | .. automodule:: mindsdb_sdk.views 5 | :members: 6 | :undoc-members: 7 | :show-inheritance: -------------------------------------------------------------------------------- /examples/data/tokaido-rulebook.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindsdb/mindsdb_python_sdk/c8c51f2597f04d30e6d16b798a304153e7bd9233/examples/data/tokaido-rulebook.pdf -------------------------------------------------------------------------------- /examples/home_rentals.py: -------------------------------------------------------------------------------- 1 | 2 | import mindsdb_sdk 3 | 4 | con = mindsdb_sdk.connect() 5 | 6 | # connect to database 7 | db = con.databases.create( 8 | 'example_db', 9 | engine='postgres', 10 | connection_args={ 11 | "user": "demo_user", 12 | "password": "demo_password", 13 | "host": "3.220.66.106", 14 | "port": "5432", 15 | "database": "demo" 16 | } 17 | ) 18 | 19 | # get table 20 | # because table with schema we are using .get 21 | tbl = db.tables.get('demo_data.home_rentals') 22 | 23 | # create model 24 | model = con.models.create( 25 | 'home_rentals_model', 26 | predict='rental_price', 27 | query=tbl 28 | ) 29 | 30 | # wait till training complete 31 | model.wait_complete() 32 | 33 | # make prediction for first 3 rows 34 | result = model.predict(tbl.limit(3)) 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /examples/using_agents.py: -------------------------------------------------------------------------------- 1 | import mindsdb_sdk 2 | 3 | con = mindsdb_sdk.connect() 4 | 5 | # We currently support Langchain as a backend. 6 | print('Creating underlying langchain model for the agent to use...') 7 | try: 8 | langchain_engine = con.ml_engines.get('langchain') 9 | except Exception: 10 | # Create the engine if it doesn't exist. 11 | langchain_engine = con.ml_engines.create('langchain', handler='langchain') 12 | 13 | # Actually create the underlying model the agent will use. 14 | langchain_model = con.models.create( 15 | 'agent_model', 16 | predict='answer', 17 | mode='retrieval', # Use retrieval mode if using knowledge bases. 18 | engine='langchain', 19 | prompt_template='You are a spicy, cheeky assistant. Add some personality and flare when responding to the user question: {{question}}', 20 | model_name='gpt-4-0125-preview' # This is the underlying LLM. Can use OpenAI, Claude, local Ollama, etc 21 | # Can optionally set LLM args here. For example: 22 | # temperature=0.0, 23 | # max_tokens=1000, 24 | # top_p=1.0, 25 | # top_k=0, 26 | # ... 27 | ) 28 | print('Agent ready to use.') 29 | 30 | # Now create an agent that will use the model we just created. 31 | agent = con.agents.create('new_agent', langchain_model) 32 | print('Ask a question: ') 33 | question = input() 34 | answer = agent.completion([{'question': question, 'answer': None}]) 35 | print(answer.content) 36 | -------------------------------------------------------------------------------- /examples/using_agents_with_retrieval.py: -------------------------------------------------------------------------------- 1 | import mindsdb_sdk 2 | from uuid import uuid4 3 | import os 4 | 5 | con = mindsdb_sdk.connect() 6 | 7 | open_ai_key = os.getenv('OPENAI_API_KEY') 8 | model_name = 'gpt-4o' 9 | 10 | # Now create an agent that will use the model we just created. 11 | agent = con.agents.create(name=f'mindsdb_retrieval_agent_{model_name}_{uuid4().hex}', 12 | model=model_name, 13 | params={'return_context': True}) 14 | 15 | agent.add_file('./data/tokaido-rulebook.pdf', 'rule book for the board game Tokaido') 16 | 17 | question = "what are the rules for the game takaido?" 18 | answer = agent.completion([{'question': question, 'answer': None}]) 19 | print(answer.context) 20 | print(answer) 21 | 22 | -------------------------------------------------------------------------------- /examples/using_agents_with_streaming_with_retrieval.py: -------------------------------------------------------------------------------- 1 | import mindsdb_sdk 2 | from uuid import uuid4 3 | import os 4 | 5 | con = mindsdb_sdk.connect() 6 | 7 | open_ai_key = os.getenv('OPENAI_API_KEY') 8 | model_name = 'gpt-4o' 9 | 10 | # Now create an agent that will use the model we just created. 11 | agent = con.agents.create(name=f'mindsdb_retrieval_agent_{model_name}_{uuid4().hex}', 12 | model=model_name, 13 | params={'return_context': True}) 14 | 15 | agent.add_file('./data/tokaido-rulebook.pdf', 'rule book for the board game Tokaido') 16 | 17 | question = "what are the rules for the game tokaido?" 18 | 19 | # Stream the completion 20 | completion_stream = agent.completion_stream([{'question': question, 'answer': None}]) 21 | 22 | # Process the streaming response 23 | full_response = "" 24 | for chunk in completion_stream: 25 | print(chunk) # Print the entire chunk for debugging 26 | if isinstance(chunk, dict): 27 | if 'output' in chunk: 28 | full_response += chunk['output'] 29 | elif isinstance(chunk, str): 30 | full_response += chunk 31 | 32 | print("\n\nFull response:") 33 | print(full_response) 34 | -------------------------------------------------------------------------------- /examples/using_agents_with_text2sql.py: -------------------------------------------------------------------------------- 1 | import mindsdb_sdk 2 | from uuid import uuid4 3 | import os 4 | 5 | con = mindsdb_sdk.connect() 6 | 7 | open_ai_key = os.getenv('OPENAI_API_KEY') 8 | model_name = 'gpt-4o' 9 | 10 | # Now create an agent that will use the model we just created. 11 | agent = con.agents.create(name=f'mindsdb_sql_agent_{model_name}_{uuid4().hex}', 12 | model=model_name) 13 | 14 | 15 | # Set up a Postgres data source with our new agent. 16 | data_source = 'postgres' 17 | connection_args = { 18 | "user": "demo_user", 19 | "password": "demo_password", 20 | "host": "samples.mindsdb.com", 21 | "port": "5432", 22 | "database": "demo", 23 | "schema": "demo_data" 24 | } 25 | description = 'mindsdb demo database' 26 | database = con.databases.create( 27 | f'mindsdb_sql_agent_datasource_{uuid4().hex}', 28 | data_source, 29 | connection_args 30 | ) 31 | 32 | # Actually connect the agent to the datasource. 33 | agent.add_database(database.name, [], description) 34 | 35 | 36 | question = 'How many three-bedroom houses were sold in 2008?' 37 | answer = agent.completion([{'question': question, 'answer': None}]) 38 | print(answer.content) 39 | 40 | con.databases.drop(database.name) 41 | con.agents.drop(agent.name) 42 | -------------------------------------------------------------------------------- /examples/using_agents_with_text2sql_streaming.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import mindsdb_sdk 4 | from uuid import uuid4 5 | import os 6 | 7 | from mindsdb_sdk.utils.agents import MindsDBSQLStreamParser 8 | 9 | con = mindsdb_sdk.connect() 10 | 11 | open_ai_key = os.getenv('OPENAI_API_KEY') 12 | model_name = 'gpt-4o' 13 | 14 | # Now create an agent that will use the model we just created. 15 | agent = con.agents.create(name=f'mindsdb_sql_agent_{model_name}_{uuid4().hex}', 16 | model=model_name) 17 | 18 | # Set up a Postgres data source with our new agent. 19 | data_source = 'postgres' 20 | connection_args = { 21 | "user": "demo_user", 22 | "password": "demo_password", 23 | "host": "samples.mindsdb.com", 24 | "port": "5432", 25 | "database": "demo", 26 | "schema": "demo_data" 27 | } 28 | description = 'mindsdb demo database' 29 | database = con.databases.create( 30 | f'mindsdb_sql_agent_datasource_{uuid4().hex}', 31 | data_source, 32 | connection_args 33 | ) 34 | 35 | # Actually connect the agent to the datasource. 36 | agent.add_database(database.name, [], description) 37 | 38 | question = 'How many three-bedroom houses were sold in 2008?' 39 | 40 | completion_stream = agent.completion_stream([{'question': question, 'answer': None}]) 41 | 42 | #default logging level is set to INFO, we can change it to DEBUG to see more detailed logs and get full agent steps 43 | mdb_parser = MindsDBSQLStreamParser() 44 | full_response, sql_query = mdb_parser.process_stream(completion_stream) 45 | 46 | con.databases.drop(database.name) 47 | con.agents.drop(agent.name) 48 | -------------------------------------------------------------------------------- /examples/using_database_mind_text2sql.py: -------------------------------------------------------------------------------- 1 | from uuid import uuid4 2 | 3 | from openai import OpenAI 4 | from mindsdb_sdk.utils.mind import create_mind, DatabaseConfig 5 | import os 6 | 7 | 8 | # Load MindsDB API key from environment variable. or set it here. 9 | MINDSDB_API_KEY = os.getenv('MINDSDB_API_KEY') 10 | 11 | # Set the base URL for the MindsDB LiteLLM proxy. 12 | base_url = 'https://llm.mdb.ai' 13 | 14 | 15 | # Connect to MindsDB LiteLLM proxy. 16 | client = OpenAI( 17 | api_key=MINDSDB_API_KEY, 18 | base_url=base_url 19 | ) 20 | 21 | # Create a Database Config. 22 | pg_config = DatabaseConfig( 23 | description='House Sales', 24 | type='postgres', 25 | connection_args={ 26 | 'user': 'demo_user', 27 | 'password': 'demo_password', 28 | 'host': 'samples.mindsdb.com', 29 | 'port': '5432', 30 | 'database': 'demo', 31 | 'schema': 'demo_data' 32 | }, 33 | tables=['house_sales'] 34 | ) 35 | 36 | # create a database mind 37 | mind = create_mind( 38 | base_url= base_url, 39 | api_key= MINDSDB_API_KEY, 40 | name = f'my_house_data_mind_{uuid4().hex}', 41 | data_source_configs=[pg_config], 42 | ) 43 | 44 | # Actually pass in our tool to get a SQL completion. 45 | completion = client.chat.completions.create( 46 | model=mind.name, 47 | messages=[ 48 | {'role': 'user', 'content': 'How many 2 bedroom houses sold in 2008?'} 49 | ], 50 | stream=False 51 | ) 52 | 53 | print(completion.choices[0].message.content) 54 | -------------------------------------------------------------------------------- /examples/using_openai.py: -------------------------------------------------------------------------------- 1 | 2 | import mindsdb_sdk 3 | 4 | con = mindsdb_sdk.connect() 5 | 6 | openai_handler = con.ml_handlers.openai 7 | 8 | # create ml engine 9 | openai = con.ml_engines.create( 10 | 'openai', 11 | handler=openai_handler, 12 | # handler='openai', # <-- another option to define handler 13 | connection_data={'api_key': ''} 14 | ) 15 | 16 | # create model 17 | model = con.models.create( 18 | 'open1', 19 | predict='answer', 20 | engine=openai, # created ml engine 21 | prompt_template='answer question: {{q}}' 22 | ) 23 | 24 | # use model 25 | model.predict({'q': 'size of the sun'}) -------------------------------------------------------------------------------- /examples/working_with_tables.py: -------------------------------------------------------------------------------- 1 | import mindsdb_sdk 2 | import pandas as pd 3 | 4 | con = mindsdb_sdk.connect() 5 | 6 | # connect to mindsdb example database 7 | example_db = con.databases.create( 8 | 'example_db', 9 | engine='postgres', 10 | connection_args={ 11 | "user": "demo_user", 12 | "password": "demo_password", 13 | "host": "3.220.66.106", 14 | "port": "5432", 15 | "database": "demo" 16 | } 17 | ) 18 | 19 | # connect to the empty user database 20 | my_db = con.databases.create( 21 | 'my_db', 22 | engine='postgres', 23 | connection_args={ 24 | "user": "postgres", 25 | "host": "localhost", 26 | "port": "5432", 27 | "database": "my_database" 28 | } 29 | ) 30 | 31 | # get home_rentals table 32 | table1 = example_db.tables.get('demo_data.home_rentals') 33 | 34 | # ---- create new table ---- 35 | 36 | # create table home_rentals in user db and fill it with rows with location=great 37 | table2 = my_db.tables.create('home_rentals', table1.filter(location='great')) 38 | 39 | 40 | # create table from csv file 41 | 42 | df = pd.read_csv('my_data.csv') 43 | table3 = my_db.tables.create('my_table', df) 44 | 45 | 46 | # ---- insert into table ---- 47 | 48 | # insert to table2 first 10 rows from table1 49 | table2.insert(table1.limit(10)) 50 | 51 | 52 | # ---- update data in table ---- 53 | 54 | # get all rows with number_of_rooms=1 from table1 and update values in table2 using key ('location', 'neighborhood') 55 | table2.update( 56 | table1.filter(number_of_rooms=1), 57 | on=['location', 'neighborhood'] 58 | ) 59 | 60 | 61 | # ---- delete rows from table ---- 62 | 63 | # delete all rows where bedrooms=2 64 | table2.delete(number_of_rooms=1) 65 | 66 | 67 | -------------------------------------------------------------------------------- /mindsdb_sdk/__about__.py: -------------------------------------------------------------------------------- 1 | __title__ = 'mindsdb_sdk' 2 | __package_name__ = 'mindsdb_sdk' 3 | __version__ = '3.4.3' 4 | __description__ = "MindsDB Python SDK, provides an SDK to use a remote mindsdb instance" 5 | __email__ = "jorge@mindsdb.com" 6 | __author__ = 'MindsDB Inc' 7 | __github__ = 'https://github.com/mindsdb/mindsdb_python_sdk' 8 | __pypi__ = 'https://pypi.org/project/mindsdb-sdk/' 9 | __license__ = 'MIT' 10 | __copyright__ = 'Copyright 2020- mindsdb' 11 | -------------------------------------------------------------------------------- /mindsdb_sdk/__init__.py: -------------------------------------------------------------------------------- 1 | from mindsdb_sdk.connect import connect 2 | -------------------------------------------------------------------------------- /mindsdb_sdk/agents.py: -------------------------------------------------------------------------------- 1 | from requests.exceptions import HTTPError 2 | from typing import Iterable, List, Union 3 | from urllib.parse import urlparse 4 | from uuid import uuid4 5 | import datetime 6 | import json 7 | 8 | from mindsdb_sdk.knowledge_bases import KnowledgeBase 9 | from mindsdb_sdk.models import Model 10 | from mindsdb_sdk.skills import Skill 11 | from mindsdb_sdk.utils.objects_collection import CollectionBase 12 | 13 | _DEFAULT_LLM_MODEL = 'gpt-4o' 14 | _DEFAULT_LLM_PROMPT = 'Answer the user"s question in a helpful way: {{question}}' 15 | 16 | 17 | class AgentCompletion: 18 | """ 19 | Represents a full MindsDB agent completion response. 20 | 21 | Attributes: 22 | content: The completion content. 23 | context: Only relevant for retrieval agents. Contains the context retrieved from the knowledge base. 24 | 25 | 26 | """ 27 | 28 | def __init__(self, content: str, context: List[dict] = None): 29 | self.content = content 30 | self.context = context 31 | 32 | def __repr__(self): 33 | return f'{self.__class__.__name__}(content: {self.content}, context: {self.context})' 34 | 35 | 36 | class Agent: 37 | """Represents a MindsDB agent. 38 | 39 | Working with agents: 40 | 41 | Get an agent by name: 42 | 43 | >>> agent = agents.get('my_agent') 44 | 45 | Query an agent: 46 | 47 | >>> completion = agent.completion([{'question': 'What is your name?', 'answer': None}]) 48 | >>> print(completion.content) 49 | 50 | Query an agent with streaming: 51 | 52 | >>> completion = agent.completion_stream([{'question': 'What is your name?', 'answer': None}]) 53 | >>> for chunk in completion: 54 | print(chunk.choices[0].delta.content) 55 | 56 | List all agents: 57 | 58 | >>> agents = agents.list() 59 | 60 | Create a new agent: 61 | 62 | >>> model = models.get('my_model') # Or use models.create(...) 63 | >>> # Connect your agent to a MindsDB table. 64 | >>> text_to_sql_skill = skills.create('text_to_sql', 'sql', { 'tables': ['my_table'], 'database': 'my_database' }) 65 | >>> agent = agents.create('my_agent', model, [text_to_sql_skill]) 66 | 67 | Update an agent: 68 | 69 | >>> new_model = models.get('new_model') 70 | >>> agent.model_name = new_model.name 71 | >>> new_skill = skills.create('new_skill', 'sql', { 'tables': ['new_table'], 'database': 'new_database' }) 72 | >>> updated_agent.skills.append(new_skill) 73 | >>> updated_agent = agents.update('my_agent', agent) 74 | 75 | Delete an agent by name: 76 | 77 | >>> agents.drop('my_agent') 78 | """ 79 | 80 | def __init__( 81 | self, 82 | name: str, 83 | model_name: str, 84 | skills: List[Skill], 85 | params: dict, 86 | created_at: datetime.datetime, 87 | updated_at: datetime.datetime, 88 | provider: str = None, 89 | collection: CollectionBase = None 90 | ): 91 | self.name = name 92 | self.model_name = model_name 93 | self.provider = provider 94 | self.skills = skills 95 | self.params = params 96 | self.created_at = created_at 97 | self.updated_at = updated_at 98 | self.collection = collection 99 | 100 | def completion(self, messages: List[dict]) -> AgentCompletion: 101 | return self.collection.completion(self.name, messages) 102 | 103 | def completion_v2(self, messages: List[dict]) -> AgentCompletion: 104 | return self.collection.completion_v2(self.name, messages) 105 | 106 | def completion_stream(self, messages: List[dict]) -> Iterable[object]: 107 | return self.collection.completion_stream(self.name, messages) 108 | 109 | def completion_stream_v2(self, messages: List[dict]) -> Iterable[object]: 110 | return self.collection.completion_stream_v2(self.name, messages) 111 | 112 | def add_files(self, file_paths: List[str], description: str, knowledge_base: str = None): 113 | """ 114 | Add a list of files to the agent for retrieval. 115 | 116 | :param file_paths: List of paths to the files to be added. 117 | """ 118 | self.collection.add_files(self.name, file_paths, description, knowledge_base) 119 | 120 | def add_file(self, file_path: str, description: str, knowledge_base: str = None): 121 | """ 122 | Add a file to the agent for retrieval. 123 | 124 | :param file_path: Path to the file to be added. 125 | """ 126 | self.collection.add_file(self.name, file_path, description, knowledge_base) 127 | 128 | def add_webpages( 129 | self, 130 | urls: List[str], 131 | description: str, 132 | knowledge_base: str = None, 133 | crawl_depth: int = 1, 134 | limit: int = None, 135 | filters: List[str] = None): 136 | """ 137 | Add a crawled URL to the agent for retrieval. 138 | 139 | :param urls: URLs of pages to be crawled and added. 140 | :param description: Description of the webpages. Used by agent to know when to do retrieval. 141 | :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given. 142 | :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only, -1 = default max 143 | :param limit: max count of pages to crawl 144 | :param filters: Include only URLs that match these regex patterns 145 | """ 146 | self.collection.add_webpages(self.name, urls, description, knowledge_base=knowledge_base, 147 | crawl_depth=crawl_depth, limit=limit, filters=filters) 148 | 149 | def add_webpage( 150 | self, 151 | url: str, 152 | description: str, 153 | knowledge_base: str = None, 154 | crawl_depth: int = 1, 155 | limit: int = None, 156 | filters: List[str] = None): 157 | """ 158 | Add a crawled URL to the agent for retrieval. 159 | 160 | :param url: URL of the page to be crawled and added. 161 | :param description: Description of the webpages. Used by agent to know when to do retrieval. 162 | :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given. 163 | :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only, -1 = default max 164 | :param limit: max count of pages to crawl 165 | :param filters: Include only URLs that match these regex patterns 166 | """ 167 | self.collection.add_webpage(self.name, url, description, knowledge_base=knowledge_base, 168 | crawl_depth=crawl_depth, limit=limit, filters=filters) 169 | 170 | def add_database(self, database: str, tables: List[str], description: str): 171 | """ 172 | Add a database to the agent for retrieval. 173 | 174 | :param database: Name of the database to be added. 175 | :param tables: List of tables to be added. 176 | :param description: Description of the database tables. Used by the agent to know when to use SQL skill. 177 | """ 178 | self.collection.add_database(self.name, database, tables, description) 179 | 180 | def __repr__(self): 181 | return f'{self.__class__.__name__}(name: {self.name})' 182 | 183 | def __eq__(self, other): 184 | if self.name != other.name: 185 | return False 186 | if self.model_name != other.model_name: 187 | return False 188 | if self.provider != other.provider: 189 | return False 190 | if self.skills != other.skills: 191 | return False 192 | if self.params != other.params: 193 | return False 194 | if self.created_at != other.created_at: 195 | return False 196 | return self.updated_at == other.updated_at 197 | 198 | @classmethod 199 | def from_json(cls, json: dict, collection: CollectionBase): 200 | return cls( 201 | json['name'], 202 | json['model_name'], 203 | [Skill.from_json(skill) for skill in json['skills']], 204 | json['params'], 205 | json['created_at'], 206 | json['updated_at'], 207 | json['provider'], 208 | collection 209 | ) 210 | 211 | 212 | class Agents(CollectionBase): 213 | """Collection for agents""" 214 | 215 | def __init__(self, project, api): 216 | self.api = api 217 | self.project = project 218 | 219 | self.knowledge_bases = project.knowledge_bases 220 | self.models = project.models 221 | self.skills = project.skills 222 | 223 | self.databases = project.server.databases 224 | self.ml_engines = project.server.ml_engines 225 | 226 | def list(self) -> List[Agent]: 227 | """ 228 | List available agents. 229 | 230 | :return: list of agents 231 | """ 232 | data = self.api.agents(self.project.name) 233 | return [Agent.from_json(agent, self) for agent in data] 234 | 235 | def get(self, name: str) -> Agent: 236 | """ 237 | Gets an agent by name. 238 | 239 | :param name: Name of the agent 240 | 241 | :return: agent with given name 242 | """ 243 | data = self.api.agent(self.project.name, name) 244 | return Agent.from_json(data, self) 245 | 246 | def completion(self, name: str, messages: List[dict]) -> AgentCompletion: 247 | """ 248 | Queries the agent for a completion. 249 | 250 | :param name: Name of the agent 251 | :param messages: List of messages to be sent to the agent 252 | 253 | :return: completion from querying the agent 254 | """ 255 | data = self.api.agent_completion(self.project.name, name, messages) 256 | if 'context' in data['message']: 257 | return AgentCompletion(data['message']['content'], data['message'].get('context')) 258 | 259 | return AgentCompletion(data['message']['content']) 260 | 261 | def completion_v2(self, name: str, messages: List[dict]) -> AgentCompletion: 262 | """ 263 | Queries the agent for a completion. 264 | 265 | :param name: Name of the agent 266 | :param messages: List of messages to be sent to the agent 267 | 268 | :return: completion from querying the agent 269 | """ 270 | return self.api.agent_completion(self.project.name, name, messages) 271 | 272 | def completion_stream(self, name, messages: List[dict]) -> Iterable[object]: 273 | """ 274 | Queries the agent for a completion and streams the response as an iterable object. 275 | 276 | :param name: Name of the agent 277 | :param messageS: List of messages to be sent to the agent 278 | 279 | :return: iterable of completion chunks from querying the agent. 280 | """ 281 | return self.api.agent_completion_stream(self.project.name, name, messages) 282 | 283 | def completion_stream_v2(self, name, messages: List[dict]) -> Iterable[object]: 284 | """ 285 | Queries the agent for a completion and streams the response as an iterable object. 286 | 287 | :param name: Name of the agent 288 | :param messages: List of messages to be sent to the agent 289 | 290 | :return: iterable of completion chunks from querying the agent. 291 | """ 292 | return self.api.agent_completion_stream_v2(self.project.name, name, messages) 293 | 294 | def _create_default_knowledge_base(self, agent: Agent, name: str) -> KnowledgeBase: 295 | # Make sure default ML engine for embeddings exists. 296 | try: 297 | _ = self.ml_engines.get('langchain_embedding') 298 | except AttributeError: 299 | _ = self.ml_engines.create('langchain_embedding', 'langchain_embedding') 300 | # Include API keys in embeddings. 301 | if agent.provider == "mindsdb": 302 | agent_model = self.models.get(agent.model_name) 303 | training_options = json.loads(agent_model.data.get('training_options', '{}')) 304 | training_options_using = training_options.get('using', {}) 305 | api_key_params = {k: v for k, v in training_options_using.items() if 'api_key' in k} 306 | kb = self.knowledge_bases.create(name, params=api_key_params) 307 | else: 308 | kb = self.knowledge_bases.create(name) 309 | # Wait for underlying embedding model to finish training. 310 | kb.model.wait_complete() 311 | return kb 312 | 313 | def add_files(self, name: str, file_paths: List[str], description: str, knowledge_base: str = None): 314 | """ 315 | Add a list of files to the agent for retrieval. 316 | 317 | :param name: Name of the agent 318 | :param file_paths: List of paths or URLs to the files to be added. 319 | :param description: Description of the file. Used by agent to know when to do retrieval 320 | :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given. 321 | """ 322 | if not file_paths: 323 | return 324 | filename_no_extension = '' 325 | all_filenames = [] 326 | for file_path in file_paths: 327 | filename = file_path.split('/')[-1].lower() 328 | filename_no_extension = filename.split('.')[0] 329 | all_filenames.append(filename_no_extension) 330 | try: 331 | _ = self.api.get_file_metadata(filename_no_extension) 332 | except HTTPError as e: 333 | if e.response.status_code >= 400 and e.response.status_code != 404: 334 | raise e 335 | # upload file to mindsdb 336 | self.api.upload_file(filename, file_path) 337 | 338 | # Insert uploaded files into new knowledge base. 339 | agent = self.get(name) 340 | if knowledge_base is not None: 341 | kb = self.knowledge_bases.get(knowledge_base) 342 | else: 343 | kb_name = f'{name.lower()}_{filename_no_extension}_{uuid4().hex}_kb' 344 | kb = self._create_default_knowledge_base(agent, kb_name) 345 | 346 | # Insert the entire file. 347 | kb.insert_files(all_filenames) 348 | 349 | # Make sure skill name is unique. 350 | skill_name = f'{filename_no_extension}_retrieval_skill_{uuid4().hex}' 351 | retrieval_params = { 352 | 'source': kb.name, 353 | 'description': description, 354 | } 355 | file_retrieval_skill = self.skills.create(skill_name, 'retrieval', retrieval_params) 356 | agent.skills.append(file_retrieval_skill) 357 | self.update(agent.name, agent) 358 | 359 | def add_file(self, name: str, file_path: str, description: str, knowledge_base: str = None): 360 | """ 361 | Add a file to the agent for retrieval. 362 | 363 | :param name: Name of the agent 364 | :param file_path: Path to the file to be added, or name of existing file. 365 | :param description: Description of the file. Used by agent to know when to do retrieval 366 | :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given. 367 | """ 368 | self.add_files(name, [file_path], description, knowledge_base) 369 | 370 | def add_webpages( 371 | self, 372 | name: str, 373 | urls: List[str], 374 | description: str, 375 | knowledge_base: str = None, 376 | crawl_depth: int = 1, 377 | limit: int = None, 378 | filters: List[str] = None 379 | ): 380 | """ 381 | Add a list of webpages to the agent for retrieval. 382 | 383 | :param name: Name of the agent 384 | :param urls: List of URLs of the webpages to be added. 385 | :param description: Description of the webpages. Used by agent to know when to do retrieval. 386 | :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given. 387 | :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only 388 | :param limit: max count of pages to crawl 389 | :param filters: Include only URLs that match these regex patterns 390 | """ 391 | if not urls: 392 | return 393 | agent = self.get(name) 394 | for url in urls: 395 | # Validate URLs. 396 | _ = urlparse(url) 397 | if knowledge_base is not None: 398 | kb = self.knowledge_bases.get(knowledge_base) 399 | else: 400 | kb_name = f'{name.lower()}_web_{uuid4().hex}_kb' 401 | kb = self._create_default_knowledge_base(agent, kb_name) 402 | 403 | # Insert crawled webpage. 404 | kb.insert_webpages(urls, crawl_depth=crawl_depth, filters=filters, limit=limit) 405 | 406 | # Make sure skill name is unique. 407 | skill_name = f'web_retrieval_skill_{uuid4().hex}' 408 | retrieval_params = { 409 | 'source': kb.name, 410 | 'description': description, 411 | } 412 | webpage_retrieval_skill = self.skills.create(skill_name, 'retrieval', retrieval_params) 413 | agent.skills.append(webpage_retrieval_skill) 414 | self.update(agent.name, agent) 415 | 416 | def add_webpage( 417 | self, 418 | name: str, 419 | url: str, 420 | description: str, 421 | knowledge_base: str = None, 422 | crawl_depth: int = 1, 423 | limit: int = None, 424 | filters: List[str] = None): 425 | """ 426 | Add a webpage to the agent for retrieval. 427 | 428 | :param name: Name of the agent 429 | :param file_path: URL of the webpage to be added, or name of existing webpage. 430 | :param description: Description of the webpage. Used by agent to know when to do retrieval. 431 | :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given. 432 | :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only 433 | :param limit: max count of pages to crawl 434 | :param filters: Include only URLs that match these regex patterns 435 | """ 436 | self.add_webpages(name, [url], description, knowledge_base=knowledge_base, 437 | crawl_depth=crawl_depth, limit=limit, filters=filters) 438 | 439 | def add_database(self, name: str, database: str, tables: List[str], description: str): 440 | """ 441 | Add a database to the agent for retrieval. 442 | 443 | :param name: Name of the agent 444 | :param database: Name of the database to be added. 445 | :param tables: List of tables to be added. 446 | :param description: Description of the database. Used by agent to know when to do retrieval. 447 | """ 448 | # Make sure database exists. 449 | db = self.databases.get(database) 450 | # Make sure tables exist. 451 | all_table_names = set([t.name for t in db.tables.list()]) 452 | for t in tables: 453 | if t not in all_table_names: 454 | raise ValueError(f'Table {t} does not exist in database {database}.') 455 | 456 | # Make sure skill name is unique. 457 | skill_name = f'{database}_sql_skill_{uuid4().hex}' 458 | sql_params = { 459 | 'database': database, 460 | 'tables': tables, 461 | 'description': description, 462 | } 463 | database_sql_skill = self.skills.create(skill_name, 'sql', sql_params) 464 | agent = self.get(name) 465 | 466 | if not agent.params: 467 | agent.params = {} 468 | if 'prompt_template' not in agent.params: 469 | # Set default prompt template. This is for langchain agent check. 470 | agent.params['prompt_template'] = 'using mindsdb sqltoolbox' 471 | 472 | agent.skills.append(database_sql_skill) 473 | self.update(agent.name, agent) 474 | 475 | def create( 476 | self, 477 | name: str, 478 | model: Union[Model, dict, str] = None, 479 | provider: str = None, 480 | skills: List[Union[Skill, str]] = None, 481 | params: dict = None, 482 | **kwargs) -> Agent: 483 | """ 484 | Create new agent and return it 485 | 486 | :param name: Name of the agent to be created 487 | :param model: Model to be used by the agent 488 | :param skills: List of skills to be used by the agent. Currently only 'sql' is supported. 489 | :param params: Parameters for the agent 490 | 491 | :return: created agent object 492 | """ 493 | skills = skills or [] 494 | skill_names = [] 495 | for skill in skills: 496 | if isinstance(skill, str): 497 | # Check if skill exists. 498 | # TODO what this line does? 499 | _ = self.skills.get(skill) 500 | skill_names.append(skill) 501 | continue 502 | # Create the skill if it doesn't exist. 503 | _ = self.skills.create(skill.name, skill.type, skill.params) 504 | skill_names.append(skill.name) 505 | 506 | if params is None: 507 | params = {} 508 | params.update(kwargs) 509 | 510 | if 'prompt_template' not in params: 511 | params['prompt_template'] = _DEFAULT_LLM_PROMPT 512 | 513 | if model is None: 514 | model = _DEFAULT_LLM_MODEL 515 | elif isinstance(model, Model): 516 | model = model.name 517 | provider = 'mindsdb' 518 | 519 | data = self.api.create_agent(self.project.name, name, model, provider, skill_names, params) 520 | return Agent.from_json(data, self) 521 | 522 | def update(self, name: str, updated_agent: Agent): 523 | """ 524 | Update an agent by name. 525 | 526 | :param name: Name of the agent to be updated 527 | :param updated_agent: Agent with updated fields 528 | 529 | :return: updated agent object 530 | """ 531 | updated_skills = set() 532 | for skill in updated_agent.skills: 533 | if isinstance(skill, str): 534 | # Skill must exist. 535 | _ = self.skills.get(skill) 536 | updated_skills.add(skill) 537 | continue 538 | try: 539 | # Create the skill if it doesn't exist. 540 | _ = self.skills.get(skill.name) 541 | except HTTPError as e: 542 | if e.response.status_code != 404: 543 | raise e 544 | # Doesn't exist 545 | _ = self.skills.create(skill.name, skill.type, skill.params) 546 | updated_skills.add(skill.name) 547 | 548 | existing_agent = self.api.agent(self.project.name, name) 549 | existing_skills = set([s['name'] for s in existing_agent['skills']]) 550 | skills_to_add = updated_skills.difference(existing_skills) 551 | skills_to_remove = existing_skills.difference(updated_skills) 552 | data = self.api.update_agent( 553 | self.project.name, 554 | name, 555 | updated_agent.name, 556 | updated_agent.provider, 557 | updated_agent.model_name, 558 | list(skills_to_add), 559 | list(skills_to_remove), 560 | updated_agent.params 561 | ) 562 | return Agent.from_json(data, self) 563 | 564 | def drop(self, name: str): 565 | """ 566 | Drop an agent by name. 567 | 568 | :param name: Name of the agent to be dropped 569 | """ 570 | _ = self.api.delete_agent(self.project.name, name) 571 | -------------------------------------------------------------------------------- /mindsdb_sdk/connect.py: -------------------------------------------------------------------------------- 1 | from mindsdb_sdk.server import Server 2 | 3 | from mindsdb_sdk.connectors.rest_api import RestAPI 4 | 5 | DEFAULT_LOCAL_API_URL = 'http://127.0.0.1:47334' 6 | DEFAULT_CLOUD_API_URL = 'https://cloud.mindsdb.com' 7 | 8 | 9 | def connect( 10 | url: str = None, 11 | login: str = None, 12 | password: str = None, 13 | api_key: str = None, 14 | is_managed: bool = False, 15 | cookies=None, 16 | headers=None) -> Server: 17 | """ 18 | Create connection to mindsdb server 19 | 20 | :param url: url to mindsdb server 21 | :param login: user login, for cloud version it contents email 22 | :param password: user password to login (for cloud version) 23 | :param api_key: API key to authenticate (for cloud version) 24 | :param is_managed: whether or not the URL points to a managed instance 25 | :param cookies: addtional cookies to send with the connection, optional 26 | :param headers: addtional headers to send with the connection, optional 27 | :return: Server object 28 | 29 | Examples 30 | -------- 31 | 32 | >>> import mindsdb_sdk 33 | 34 | Connect to local server 35 | 36 | >>> con = mindsdb_sdk.connect() 37 | >>> con = mindsdb_sdk.connect('http://127.0.0.1:47334') 38 | 39 | Connect to cloud server 40 | 41 | >>> con = mindsdb_sdk.connect('https://cloud.mindsdb.com', api_key='-') 42 | 43 | Connect to MindsDB pro 44 | 45 | >>> con = mindsdb_sdk.connect('http://', login='a@b.com', password='-', is_managed=True) 46 | 47 | """ 48 | if url is None: 49 | if login is not None: 50 | # default is cloud 51 | url = DEFAULT_CLOUD_API_URL 52 | else: 53 | # is local 54 | url = DEFAULT_LOCAL_API_URL 55 | 56 | api = RestAPI(url, login, password, api_key, is_managed, 57 | cookies=cookies, headers=headers) 58 | 59 | return Server(api) 60 | -------------------------------------------------------------------------------- /mindsdb_sdk/connectors/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindsdb/mindsdb_python_sdk/c8c51f2597f04d30e6d16b798a304153e7bd9233/mindsdb_sdk/connectors/__init__.py -------------------------------------------------------------------------------- /mindsdb_sdk/connectors/rest_api.py: -------------------------------------------------------------------------------- 1 | from functools import wraps 2 | from typing import List, Union 3 | import io 4 | import json 5 | 6 | import requests 7 | import pandas as pd 8 | import validators 9 | 10 | from mindsdb_sdk import __about__ 11 | from sseclient import SSEClient 12 | 13 | 14 | def _try_relogin(fnc): 15 | @wraps(fnc) 16 | def wrapper(self, *args, **kwargs): 17 | try: 18 | return fnc(self, *args, **kwargs) 19 | except requests.HTTPError as e: 20 | if e.response.status_code != 401: 21 | raise e 22 | 23 | # try re-login 24 | try: 25 | self.login() 26 | except requests.HTTPError: 27 | raise e 28 | # call once more 29 | return fnc(self, *args, **kwargs) 30 | return wrapper 31 | 32 | 33 | def _raise_for_status(response): 34 | # show response text in error 35 | if 400 <= response.status_code < 600: 36 | raise requests.HTTPError(f'{response.reason}: {response.text}', response=response) 37 | 38 | 39 | class RestAPI: 40 | def __init__(self, url=None, login=None, password=None, api_key=None, is_managed=False, 41 | cookies=None, headers=None): 42 | 43 | self.url = url 44 | self.username = login 45 | self.password = password 46 | self.api_key = api_key 47 | self.is_managed = is_managed 48 | self.session = requests.Session() 49 | 50 | if cookies is not None: 51 | self.session.cookies.update(cookies) 52 | 53 | self.session.headers['User-Agent'] = f'python-sdk/{__about__.__version__}' 54 | if headers is not None: 55 | self.session.headers.update(headers) 56 | if self.api_key is not None: 57 | # Authenticate with API key instead of logging in, if present. 58 | self.session.headers['X-Api-Key'] = self.api_key 59 | return 60 | if login is not None: 61 | self.login() 62 | 63 | def login(self): 64 | managed_endpoint = '/api/login' 65 | cloud_endpoint = '/cloud/login' 66 | 67 | if self.is_managed: 68 | json = {'password': self.password, 'username': self.username} 69 | url = self.url + managed_endpoint 70 | else: 71 | json = {'password': self.password, 'email': self.username} 72 | url = self.url + cloud_endpoint 73 | r = self.session.post(url, json=json) 74 | 75 | # failback when is using managed instance with is_managed=False 76 | if r.status_code in (405, 404) and self.is_managed is False: 77 | # try managed instance login 78 | 79 | json = {'password': self.password, 'username': self.username} 80 | url = self.url + managed_endpoint 81 | r = self.session.post(url, json=json) 82 | 83 | _raise_for_status(r) 84 | 85 | @_try_relogin 86 | def sql_query(self, sql, database=None, lowercase_columns=False): 87 | 88 | if database is None: 89 | # it means the database is included in query 90 | database = 'mindsdb' 91 | url = self.url + '/api/sql/query' 92 | r = self.session.post(url, json={ 93 | 'query': sql, 94 | 'context': {'db': database} 95 | }) 96 | _raise_for_status(r) 97 | 98 | data = r.json() 99 | if data['type'] == 'table': 100 | columns = data['column_names'] 101 | if lowercase_columns: 102 | columns = [i.lower() for i in columns] 103 | return pd.DataFrame(data['data'], columns=columns) 104 | if data['type'] == 'error': 105 | raise RuntimeError(data['error_message']) 106 | return None 107 | 108 | @_try_relogin 109 | def projects(self): 110 | # TODO not used yet 111 | 112 | r = self.session.get(self.url + '/api/projects') 113 | _raise_for_status(r) 114 | 115 | return pd.DataFrame(r.json()) 116 | 117 | @_try_relogin 118 | def model_predict(self, project, model, data, params=None, version=None): 119 | data = data.to_dict('records') 120 | 121 | if version is not None: 122 | model = f'{model}.{version}' 123 | if params is None: 124 | params = {} 125 | url = self.url + f'/api/projects/{project}/models/{model}/predict' 126 | r = self.session.post(url, json={ 127 | 'data': data, 128 | 'params': params 129 | }) 130 | _raise_for_status(r) 131 | 132 | return pd.DataFrame(r.json()) 133 | 134 | @_try_relogin 135 | def objects_tree(self, item=''): 136 | r = self.session.get(self.url + f'/api/tree/{item}') 137 | _raise_for_status(r) 138 | 139 | return pd.DataFrame(r.json()) 140 | 141 | @staticmethod 142 | def read_file_as_bytes(file_path: str): 143 | """ 144 | Read and return content of a file in bytes, given its path. 145 | :param file_path: Path of the file to read. 146 | :return: File content in bytes. 147 | """ 148 | try: 149 | with open(file_path, 'rb+') as file: 150 | return file.read() 151 | except FileNotFoundError: 152 | raise Exception(f'File {file_path} does not exist.') 153 | except PermissionError: 154 | raise Exception(f'Permission denied when reading file {file_path}.') 155 | except Exception as e: 156 | raise Exception(f'Unknown error occurred when reading file {file_path} - {str(e)}') 157 | 158 | @staticmethod 159 | def read_dataframe_as_csv(data: pd.DataFrame): 160 | """ 161 | Read and return content of a DataFrame as CSV in bytes. 162 | :param data: DataFrame to read. 163 | :return: DataFrame content as CSV in bytes. 164 | """ 165 | fd = io.BytesIO() 166 | data.to_csv(fd, index=False) 167 | fd.seek(0) 168 | return fd.read() 169 | 170 | @staticmethod 171 | def read_file_as_webpage(url: str): 172 | """ 173 | Read and return content of a file in bytes, given its URL. 174 | :param file_path: URL of the file to read. 175 | :return: File content in bytes. 176 | """ 177 | data = requests.get(url) 178 | return data.content 179 | 180 | def upload_data(self, file_name: str, data: bytes): 181 | """ 182 | Upload binary data to MindsDB. 183 | :param file_name: Name of the file. 184 | :param data: Binary data to upload. 185 | """ 186 | # remove suffix from file if present 187 | name = file_name.split('.')[0] 188 | 189 | url = self.url + f'/api/files/{name}' 190 | r = self.session.put( 191 | url, 192 | data={ 193 | 'original_file_name':file_name, 194 | 'name':name, 195 | 'source_type':'file', 196 | }, 197 | files={ 198 | 'file': (file_name, data) 199 | 200 | } 201 | ) 202 | _raise_for_status(r) 203 | 204 | @_try_relogin 205 | def upload_file(self, name: str, data: Union[pd.DataFrame, str]): 206 | """ 207 | Upload a file or a DataFrame to MindsDB. 208 | :param name: Name of the file or DataFrame. 209 | :param data: DataFrame data or file path. 210 | """ 211 | if isinstance(data, pd.DataFrame): 212 | data_in_bytes = self.read_dataframe_as_csv(data) 213 | elif validators.url(data): 214 | data_in_bytes = self.read_file_as_webpage(data) 215 | else: 216 | data_in_bytes = self.read_file_as_bytes(data) 217 | 218 | self.upload_data(name, data_in_bytes) 219 | 220 | @_try_relogin 221 | def get_file_metadata(self, name: str) -> dict: 222 | # No endpoint currently to get single file. 223 | url = self.url + f'/api/files/' 224 | r = self.session.get(url) 225 | _raise_for_status(r) 226 | all_file_metadata = r.json() 227 | for metadata in all_file_metadata: 228 | if metadata.get('name', None) == name: 229 | return metadata 230 | r.status_code = 404 231 | raise requests.HTTPError(f'Not found: No file named {name} found', response=r) 232 | 233 | @_try_relogin 234 | def upload_byom(self, name: str, code: str, requirements: str): 235 | 236 | url = self.url + f'/api/handlers/byom/{name}' 237 | r = self.session.put( 238 | url, 239 | files={ 240 | 'code': code, 241 | 'modules': requirements, 242 | } 243 | ) 244 | _raise_for_status(r) 245 | 246 | def status(self) -> dict: 247 | 248 | r = self.session.get(self.url + '/api/status') 249 | _raise_for_status(r) 250 | 251 | return r.json() 252 | 253 | # TODO: Different endpoints should be refactored into their own classes. 254 | # 255 | # Agents operations. 256 | @_try_relogin 257 | def agents(self, project: str): 258 | r = self.session.get(self.url + f'/api/projects/{project}/agents') 259 | _raise_for_status(r) 260 | 261 | return r.json() 262 | 263 | @_try_relogin 264 | def agent(self, project: str, name: str): 265 | r = self.session.get(self.url + f'/api/projects/{project}/agents/{name}') 266 | _raise_for_status(r) 267 | 268 | return r.json() 269 | 270 | @_try_relogin 271 | def agent_completion(self, project: str, name: str, messages: List[dict]): 272 | url = self.url + f'/api/projects/{project}/agents/{name}/completions' 273 | r = self.session.post( 274 | url, 275 | json={ 276 | 'messages': messages 277 | } 278 | ) 279 | _raise_for_status(r) 280 | 281 | return r.json() 282 | 283 | @_try_relogin 284 | def agent_completion_stream(self, project: str, name: str, messages: List[dict]): 285 | url = self.url + f'/api/projects/{project}/agents/{name}/completions/stream' 286 | stream = self.session.post(url, json={'messages': messages}, stream=True) 287 | client = SSEClient(stream) 288 | for chunk in client.events(): 289 | # Stream objects loaded from SSE events 'data' param. 290 | yield json.loads(chunk.data) 291 | 292 | @_try_relogin 293 | def agent_completion_stream_v2(self, project: str, name: str, messages: List[dict]): 294 | url = self.url + f'/api/projects/{project}/agents/{name}/completions/stream' 295 | response = self.session.post(url, json={'messages': messages}, stream=True) 296 | 297 | # Check for HTTP errors before processing the stream 298 | response.raise_for_status() 299 | 300 | client = SSEClient(response) 301 | 302 | try: 303 | for chunk in client.events(): 304 | yield chunk # Stream SSE events 305 | except Exception as e: 306 | yield e 307 | 308 | @_try_relogin 309 | def create_agent(self, project: str, name: str, model: str = None, provider: str = None, skills: List[str] = None, params: dict = None): 310 | url = self.url + f'/api/projects/{project}/agents' 311 | r = self.session.post( 312 | url, 313 | json={ 314 | 'agent': { 315 | 'name': name, 316 | 'model_name': model, 317 | 'provider': provider, 318 | 'skills': skills, 319 | 'params': params 320 | } 321 | } 322 | ) 323 | _raise_for_status(r) 324 | return r.json() 325 | 326 | @_try_relogin 327 | def update_agent( 328 | self, 329 | project: str, 330 | name: str, 331 | updated_name: str, 332 | updated_provider: str, 333 | updated_model: str, 334 | skills_to_add: List[str], 335 | skills_to_remove: List[str], 336 | updated_params: dict 337 | ): 338 | url = self.url + f'/api/projects/{project}/agents/{name}' 339 | r = self.session.put( 340 | url, 341 | json={ 342 | 'agent': { 343 | 'name': updated_name, 344 | 'model_name': updated_model, 345 | 'provider': updated_provider, 346 | 'skills_to_add': skills_to_add, 347 | 'skills_to_remove': skills_to_remove, 348 | 'params': updated_params 349 | } 350 | } 351 | ) 352 | _raise_for_status(r) 353 | return r.json() 354 | 355 | @_try_relogin 356 | def delete_agent(self, project: str, name: str): 357 | url = self.url + f'/api/projects/{project}/agents/{name}' 358 | r = self.session.delete(url) 359 | _raise_for_status(r) 360 | 361 | # Skills operations. 362 | @_try_relogin 363 | def skills(self, project: str): 364 | r = self.session.get(self.url + f'/api/projects/{project}/skills') 365 | _raise_for_status(r) 366 | 367 | return r.json() 368 | 369 | @_try_relogin 370 | def skill(self, project: str, name: str): 371 | r = self.session.get(self.url + f'/api/projects/{project}/skills/{name}') 372 | _raise_for_status(r) 373 | 374 | return r.json() 375 | 376 | @_try_relogin 377 | def create_skill(self, project: str, name: str, type: str, params: dict): 378 | url = self.url + f'/api/projects/{project}/skills' 379 | r = self.session.post( 380 | url, 381 | json={ 382 | 'skill': { 383 | 'name': name, 384 | 'type': type, 385 | 'params': params 386 | } 387 | } 388 | ) 389 | _raise_for_status(r) 390 | 391 | return r.json() 392 | 393 | @_try_relogin 394 | def update_skill( 395 | self, 396 | project: str, 397 | name: str, 398 | updated_name: str, 399 | updated_type: str, 400 | updated_params: dict 401 | ): 402 | url = self.url + f'/api/projects/{project}/skills/{name}' 403 | r = self.session.put( 404 | url, 405 | json={ 406 | 'skill': { 407 | 'name': updated_name, 408 | 'type': updated_type, 409 | 'params': updated_params 410 | } 411 | } 412 | ) 413 | _raise_for_status(r) 414 | return r.json() 415 | 416 | @_try_relogin 417 | def delete_skill(self, project: str, name: str): 418 | url = self.url + f'/api/projects/{project}/skills/{name}' 419 | r = self.session.delete(url) 420 | _raise_for_status(r) 421 | 422 | # Knowledge Base operations. 423 | @_try_relogin 424 | def insert_into_knowledge_base(self, project: str, knowledge_base_name: str, data): 425 | r = self.session.put( 426 | self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}', 427 | json={ 428 | 'knowledge_base': data 429 | } 430 | ) 431 | _raise_for_status(r) 432 | 433 | return r.json() 434 | 435 | @_try_relogin 436 | def list_knowledge_bases(self, project: str): 437 | r = self.session.get(self.url + f'/api/projects/{project}/knowledge_bases') 438 | _raise_for_status(r) 439 | return r.json() 440 | 441 | @_try_relogin 442 | def get_knowledge_base(self, project: str, knowledge_base_name): 443 | r = self.session.get(self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}') 444 | _raise_for_status(r) 445 | return r.json() 446 | 447 | @_try_relogin 448 | def delete_knowledge_base(self, project: str, knowledge_base_name): 449 | r = self.session.delete(self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}') 450 | _raise_for_status(r) 451 | 452 | @_try_relogin 453 | def create_knowledge_base(self, project: str, data): 454 | r = self.session.post( 455 | self.url + f'/api/projects/{project}/knowledge_bases', 456 | json={ 457 | 'knowledge_base': data 458 | } 459 | ) 460 | _raise_for_status(r) 461 | 462 | return r.json() 463 | 464 | def knowledge_base_completion(self, project: str, knowledge_base_name, payload): 465 | r = self.session.post( 466 | self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}/completions', 467 | json=payload 468 | ) 469 | _raise_for_status(r) 470 | return r.json() 471 | -------------------------------------------------------------------------------- /mindsdb_sdk/databases.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Union 2 | 3 | from mindsdb_sql_parser.ast.mindsdb import CreateDatabase 4 | from mindsdb_sql_parser.ast import DropDatabase, Identifier 5 | 6 | from mindsdb_sdk.utils.objects_collection import CollectionBase 7 | 8 | from .query import Query 9 | from .tables import Tables 10 | from .handlers import Handler 11 | 12 | 13 | class Database: 14 | """ 15 | Allows to work with database (datasource): to use tables and make raw queries 16 | 17 | To run native query 18 | At this moment query is just saved in Query object and not executed 19 | 20 | >>> query = database.query('select * from table1') # returns Query 21 | 22 | This command sends request to server to execute query and return dataframe 23 | 24 | >>> df = query.fetch() 25 | 26 | Has list of tables in .tables attribute. 27 | 28 | """ 29 | 30 | def __init__(self, server, name: str, engine: str = None, params: Dict = None): 31 | self.server = server 32 | self.name = name 33 | self.engine = engine 34 | self.api = server.api 35 | self.params = params 36 | 37 | self.tables = Tables(self, self.api) 38 | 39 | # old api 40 | self.get_table = self.tables.get 41 | self.list_tables = self.tables.list 42 | self.create_table = self.tables.create 43 | self.drop_table = self.tables.drop 44 | 45 | def __repr__(self): 46 | return f'{self.__class__.__name__}({self.name})' 47 | 48 | def query(self, sql: str) -> Query: 49 | """ 50 | Make raw query to integration 51 | 52 | :param sql: sql of the query 53 | :param database: name of database to query (uses current database by default) 54 | :return: Query object 55 | """ 56 | return Query(self.api, sql, database=self.name) 57 | 58 | 59 | class Databases(CollectionBase): 60 | """ 61 | Databases 62 | ---------- 63 | 64 | >>> databases.list() 65 | >>> db = databases[0] # Database type object 66 | 67 | # create 68 | 69 | >>> db = databases.create('example_db', 70 | ... engine='postgres', 71 | ... connection_args={'host': ''}) 72 | 73 | # drop database 74 | 75 | >>> databases.drop('example_db') 76 | 77 | # get existing 78 | 79 | >>> db = databases.get('example_db') 80 | 81 | """ 82 | 83 | def __init__(self, api): 84 | self.api = api 85 | 86 | def _list_databases(self) -> Dict[str, Database]: 87 | data = self.api.sql_query( 88 | "select NAME, ENGINE, CONNECTION_DATA from information_schema.databases where TYPE='data'" 89 | ) 90 | name_to_db = {} 91 | for _, row in data.iterrows(): 92 | name_to_db[row["NAME"]] = Database( 93 | self, row["NAME"], engine=row["ENGINE"], params=row["CONNECTION_DATA"] 94 | ) 95 | return name_to_db 96 | 97 | def list(self) -> List[Database]: 98 | """ 99 | Show list of integrations (databases) on server 100 | 101 | :return: list of Database objects 102 | """ 103 | databases = self._list_databases() 104 | return list(databases.values()) 105 | 106 | def create( 107 | self, name: str, engine: Union[str, Handler], connection_args: Dict 108 | ) -> Database: 109 | """ 110 | Create new integration and return it 111 | 112 | :param name: Identifier for the integration to be created 113 | :param engine: Engine to be selected depending on the database connection. 114 | :param connection_args: {"key": "value"} object with the connection parameters specific for each engine 115 | :return: created Database object 116 | """ 117 | if isinstance(engine, Handler): 118 | engine = engine.name 119 | 120 | ast_query = CreateDatabase( 121 | name=Identifier(name), 122 | engine=engine, 123 | parameters=connection_args, 124 | ) 125 | self.api.sql_query(ast_query.to_string()) 126 | return Database(self, name, engine=engine, params=connection_args) 127 | 128 | def drop(self, name: str): 129 | """ 130 | Delete integration 131 | 132 | :param name: name of integration 133 | """ 134 | ast_query = DropDatabase(name=Identifier(name)) 135 | self.api.sql_query(ast_query.to_string()) 136 | 137 | def get(self, name: str) -> Database: 138 | """ 139 | Get integration by name 140 | 141 | :param name: name of integration 142 | :return: Database object 143 | """ 144 | databases = self._list_databases() 145 | if name not in databases: 146 | raise AttributeError("Database doesn't exist") 147 | return databases[name] 148 | -------------------------------------------------------------------------------- /mindsdb_sdk/handlers.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | import dataclasses 3 | from typing import List 4 | 5 | from mindsdb_sql_parser.ast import Show, Identifier, BinaryOperation, Constant 6 | 7 | from mindsdb_sdk.utils.objects_collection import CollectionBase 8 | 9 | 10 | @dataclass(init=False) 11 | class Handler: 12 | """ 13 | :meta private: 14 | """ 15 | name: str 16 | title: str 17 | version: str 18 | description: str 19 | connection_args: dict 20 | import_success: bool 21 | import_error: str 22 | 23 | def __init__(self, **kwargs): 24 | names = set([f.name for f in dataclasses.fields(self)]) 25 | for k, v in kwargs.items(): 26 | if k in names: 27 | setattr(self, k, v) 28 | 29 | 30 | class Handlers(CollectionBase): 31 | """ 32 | :meta private: 33 | """ 34 | 35 | def __init__(self, api, type): 36 | self.api = api 37 | self.type = type 38 | 39 | def list(self) -> List[Handler]: 40 | """ 41 | Returns list of handlers on server depending on type 42 | :return: list of handlers 43 | """ 44 | 45 | ast_query = Show( 46 | category='HANDLERS', 47 | where=BinaryOperation( 48 | op='=', 49 | args=[ 50 | Identifier('type'), 51 | Constant(self.type) 52 | ] 53 | ) 54 | ) 55 | 56 | df = self.api.sql_query(ast_query.to_string()) 57 | # columns to lower case 58 | cols_map = {i: i.lower() for i in df.columns} 59 | df = df.rename(columns=cols_map) 60 | 61 | return [ 62 | Handler(**item) 63 | for item in df.to_dict('records') 64 | ] 65 | 66 | def get(self, name: str) -> Handler: 67 | """ 68 | Get handler by name 69 | 70 | :param name 71 | :return: handler object 72 | """ 73 | name = name.lower() 74 | for item in self.list(): 75 | if item.name == name: 76 | return item 77 | raise AttributeError(f"Handler doesn't exist: {name}") 78 | 79 | 80 | class MLHandlers(Handlers): 81 | """ 82 | **ML handlers colection** 83 | 84 | Examples of usage: 85 | 86 | Get list 87 | 88 | >>> con.ml_handlers.list() 89 | 90 | Get 91 | 92 | >>> openai_handler = con.ml_handlers.openai 93 | >>> openai_handler = con.ml_handlers.get('openai') 94 | 95 | """ 96 | 97 | ... 98 | 99 | 100 | class DataHandlers(Handlers): 101 | """ 102 | **DATA handlers colection** 103 | 104 | Examples of usage: 105 | 106 | Get list 107 | 108 | >>> con.data_handlers.list() 109 | 110 | Get 111 | 112 | >>> pg_handler = con.data_handlers.postgres 113 | >>> pg_handler = con.data_handlers.get('postgres') 114 | 115 | """ 116 | 117 | ... -------------------------------------------------------------------------------- /mindsdb_sdk/jobs.py: -------------------------------------------------------------------------------- 1 | import datetime as dt 2 | from typing import Union, List 3 | 4 | import pandas as pd 5 | 6 | from mindsdb_sql_parser.ast.mindsdb import CreateJob, DropJob 7 | from mindsdb_sql_parser.ast import Identifier, Star, Select 8 | 9 | from mindsdb_sdk.query import Query 10 | from mindsdb_sdk.utils.sql import dict_to_binary_op 11 | from mindsdb_sdk.utils.objects_collection import CollectionBase 12 | from mindsdb_sdk.utils.context import set_saving 13 | 14 | 15 | class Job: 16 | def __init__(self, project, name, data=None, create_callback=None): 17 | self.project = project 18 | self.name = name 19 | self.data = data 20 | 21 | self.query_str = None 22 | if data is not None: 23 | self._update(data) 24 | self._queries = [] 25 | self._create_callback = create_callback 26 | 27 | def _update(self, data): 28 | # self.name = data['name'] 29 | self.query_str = data['query'] 30 | self.start_at = data['start_at'] 31 | self.end_at = data['end_at'] 32 | self.next_run_at = data['next_run_at'] 33 | self.schedule_str = data['schedule_str'] 34 | 35 | def __repr__(self): 36 | return f"{self.__class__.__name__}({self.name}, query='{self.query_str}')" 37 | 38 | def __enter__(self): 39 | if self._create_callback is None: 40 | raise ValueError("The job is already created and can't be used to create context." 41 | " To be able to use context: create job without 'query_str' parameter: " 42 | "\n>>> with con.jobs.create('j1') as job:" 43 | "\n>>> job.add_query(...)") 44 | set_saving(f'job-{self.name}') 45 | 46 | return self 47 | 48 | def __exit__(self, type, value, traceback): 49 | set_saving(None) 50 | if type is None: 51 | if len(self._queries) == 0: 52 | raise ValueError('No queries were added to job') 53 | 54 | query_str = '; '.join(self._queries) 55 | 56 | self._create_callback(query_str) 57 | 58 | self.refresh() 59 | 60 | def refresh(self): 61 | """ 62 | Retrieve job data from mindsdb server 63 | """ 64 | job = self.project.get_job(self.name) 65 | self._update(job.data) 66 | 67 | def add_query(self, query: Union[Query, str]): 68 | """ 69 | Add a query to job. Method is used in context of the job 70 | 71 | >>> with con.jobs.create('j1') as job: 72 | >>> job.add_query(table1.insert(table2)) 73 | 74 | :param query: string or Query object. Query.database should be emtpy or the same as job's project 75 | """ 76 | if isinstance(query, Query): 77 | 78 | if query.database is not None and query.database != self.project.name: 79 | # we can't execute this query in jobs project 80 | raise ValueError(f"Wrong query database: {query.database}. You could try to use sql string instead") 81 | 82 | query = query.sql 83 | elif not isinstance(query, str): 84 | raise ValueError(f'Unable to use add this object as a query: {query}. Try to use sql string instead') 85 | self._queries.append(query) 86 | 87 | def get_history(self) -> pd.DataFrame: 88 | """ 89 | Get history of job execution 90 | 91 | :return: dataframe with job executions 92 | """ 93 | ast_query = Select( 94 | targets=[Star()], 95 | from_table=Identifier('jobs_history'), 96 | where=dict_to_binary_op({ 97 | 'name': self.name 98 | }) 99 | ) 100 | return self.project.api.sql_query(ast_query.to_string(), database=self.project.name) 101 | 102 | 103 | class Jobs(CollectionBase): 104 | def __init__(self, project, api): 105 | self.project = project 106 | self.api = api 107 | 108 | def _list(self, name: str = None) -> List[Job]: 109 | 110 | ast_query = Select(targets=[Star()], from_table=Identifier('jobs')) 111 | 112 | if name is not None: 113 | ast_query.where = dict_to_binary_op({'name': name}) 114 | 115 | df = self.api.sql_query(ast_query.to_string(), database=self.project.name) 116 | 117 | # columns to lower case 118 | cols_map = {i: i.lower() for i in df.columns} 119 | df = df.rename(columns=cols_map) 120 | 121 | return [ 122 | Job(self.project, item.pop('name'), item) 123 | for item in df.to_dict('records') 124 | ] 125 | 126 | def list(self) -> List[Job]: 127 | """ 128 | Show list of jobs in project 129 | 130 | :return: list of Job objects 131 | """ 132 | 133 | return self._list() 134 | 135 | def get(self, name: str) -> Job: 136 | """ 137 | Get job by name from project 138 | 139 | :param name: name of the job 140 | :return: Job object 141 | """ 142 | 143 | jobs = self._list(name) 144 | if len(jobs) == 1: 145 | return jobs[0] 146 | elif len(jobs) == 0: 147 | raise AttributeError("Job doesn't exist") 148 | else: 149 | raise RuntimeError("Several jobs with the same name") 150 | 151 | def create( 152 | self, 153 | name: str, 154 | query_str: str = None, 155 | start_at: dt.datetime = None, 156 | end_at: dt.datetime = None, 157 | repeat_str: str = None, 158 | repeat_min: int = None, 159 | ) -> Union[Job, None]: 160 | """ 161 | Create new job in project and return it. 162 | 163 | If it is not possible (job executed and not accessible anymore): 164 | return None 165 | 166 | Usage options: 167 | 168 | Option 1: to use string query 169 | All job tasks could be passed as string with sql queries. Job is created immediately 170 | 171 | >>> job = con.jobs.create('j1', query_str='retrain m1; show models', repeat_min=1): 172 | 173 | Option 2: to use 'with' block. 174 | It allows to pass sdk commands to job tasks. 175 | Not all sdk commands could be accepted here, 176 | only those which are converted in to sql in sdk and sent to /query endpoint 177 | Adding query sql string is accepted as well 178 | Job will be created after exit from 'with' block 179 | 180 | >>> with con.jobs.create('j1', repeat_min=1) as job: 181 | >>> job.add_query(table1.insert(table2)) 182 | >>> job.add_query('retrain m1') # using string 183 | 184 | More info about jobs: https://docs.mindsdb.com/sql/create/jobs 185 | 186 | :param name: name of the job 187 | :param query_str: str, job's query (or list of queries with ';' delimiter) which job have to execute 188 | :param start_at: datetime, first start of job, 189 | :param end_at: datetime, when job have to be stopped, 190 | :param repeat_str: str, optional, how to repeat job (e.g. '1 hour', '2 weeks', '3 min') 191 | :param repeat_min: int, optional, period to repeat the job in minutes 192 | :return: Job object or None 193 | """ 194 | 195 | if start_at is not None: 196 | start_str = start_at.strftime("%Y-%m-%d %H:%M:%S") 197 | else: 198 | start_str = None 199 | 200 | if end_at is not None: 201 | end_str = end_at.strftime("%Y-%m-%d %H:%M:%S") 202 | else: 203 | end_str = None 204 | 205 | if repeat_min is not None: 206 | repeat_str = f'{repeat_min} minutes' 207 | 208 | def _create_callback(query): 209 | ast_query = CreateJob( 210 | name=Identifier(name), 211 | query_str=query, 212 | start_str=start_str, 213 | end_str=end_str, 214 | repeat_str=repeat_str 215 | ) 216 | 217 | self.api.sql_query(ast_query.to_string(), database=self.project.name) 218 | 219 | if query_str is None: 220 | # allow to create context with job 221 | job = Job(self.project, name, create_callback=_create_callback) 222 | return job 223 | else: 224 | # create it 225 | _create_callback(query_str) 226 | 227 | # job can be executed and remove it is not repeatable 228 | jobs = self._list(name) 229 | if len(jobs) == 1: 230 | return jobs[0] 231 | 232 | 233 | def drop(self, name: str): 234 | """ 235 | Drop job from project 236 | 237 | :param name: name of the job 238 | """ 239 | ast_query = DropJob(Identifier(name)) 240 | 241 | self.api.sql_query(ast_query.to_string(), database=self.project.name) 242 | -------------------------------------------------------------------------------- /mindsdb_sdk/knowledge_bases.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import json 3 | from typing import Union, List, Iterable 4 | 5 | import pandas as pd 6 | 7 | from mindsdb_sql_parser.ast.mindsdb import CreateKnowledgeBase, DropKnowledgeBase 8 | from mindsdb_sql_parser.ast import Identifier, Star, Select, BinaryOperation, Constant, Insert 9 | 10 | from mindsdb_sdk.utils.sql import dict_to_binary_op, query_to_native_query 11 | from mindsdb_sdk.utils.objects_collection import CollectionBase 12 | from mindsdb_sdk.utils.context import is_saving 13 | 14 | from .models import Model 15 | from .tables import Table 16 | from .query import Query 17 | from .databases import Database 18 | 19 | MAX_INSERT_SIZE = 1000 20 | 21 | 22 | def split_data(data: Union[pd.DataFrame, list], partition_size: int) -> Iterable: 23 | """ 24 | Split data into chunks with partition_size and yield them out 25 | """ 26 | num = 0 27 | while num * partition_size < len(data): 28 | # create results with partition 29 | yield data[num * partition_size: (num + 1) * partition_size] 30 | num += 1 31 | 32 | 33 | class KnowledgeBase(Query): 34 | """ 35 | 36 | Knowledge base object, used to update or query knowledge base 37 | 38 | Add data to knowledge base: 39 | 40 | >>> kb.insert(pd.read_csv('house_sales.csv')) 41 | 42 | Query relevant results 43 | 44 | >>> df = kb.find('flats').fetch() 45 | 46 | """ 47 | 48 | def __init__(self, api, project, data: dict): 49 | self.api = api 50 | self.project = project 51 | self.name = data['name'] 52 | self.table_name = Identifier(parts=[self.project.name, self.name]) 53 | 54 | self.storage = None 55 | if data.get('vector_database_table') is not None: 56 | database = Database(project, data['vector_database']) 57 | table = Table(database, data['vector_database_table']) 58 | self.storage = table 59 | 60 | self.model = None 61 | if data['embedding_model'] is not None: 62 | self.model = Model(self.project, {'name': data['embedding_model']}) 63 | 64 | params = data.get('params', {}) 65 | if isinstance(params, str): 66 | try: 67 | params = json.loads(params) 68 | except json.JSONDecodeError: 69 | params = {} 70 | 71 | # columns 72 | self.metadata_columns = params.pop('metadata_columns', []) 73 | self.content_columns = params.pop('content_columns', []) 74 | self.id_column = params.pop('id_column', None) 75 | 76 | self.params = params 77 | 78 | # query behavior 79 | self._query = None 80 | self._limit = None 81 | 82 | self._update_query() 83 | 84 | # empty database 85 | super().__init__(project.api, self.sql, None) 86 | 87 | def __repr__(self): 88 | return f'{self.__class__.__name__}({self.project.name}.{self.name})' 89 | 90 | def find(self, query: str, limit: int = 100): 91 | """ 92 | 93 | Query data from knowledge base. 94 | Knowledge base should return a most relevant results for the query 95 | 96 | >>> # query knowledge base 97 | >>> query = my_kb.find('dogs') 98 | >>> # fetch dataframe to client 99 | >>> print(query.fetch()) 100 | 101 | :param query: text query 102 | :param limit: count of rows in result, default 100 103 | :return: Query object 104 | """ 105 | 106 | kb = copy.deepcopy(self) 107 | kb._query = query 108 | kb._limit = limit 109 | kb._update_query() 110 | 111 | return kb 112 | 113 | def _update_query(self): 114 | 115 | ast_query = Select( 116 | targets=[Star()], 117 | from_table=self.table_name 118 | ) 119 | if self._query is not None: 120 | ast_query.where = BinaryOperation(op='=', args=[ 121 | Identifier('content'), 122 | Constant(self._query) 123 | ]) 124 | 125 | if self._limit is not None: 126 | ast_query.limit = Constant(self._limit) 127 | self.sql = ast_query.to_string() 128 | 129 | def insert_files(self, file_paths: List[str], params: dict = None): 130 | """ 131 | Insert data from file to knowledge base 132 | """ 133 | data = {'files': file_paths} 134 | if params: 135 | data['params'] = params 136 | 137 | self.api.insert_into_knowledge_base( 138 | self.project.name, 139 | self.name, 140 | data=data 141 | ) 142 | 143 | def insert_webpages(self, urls: List[str], crawl_depth: int = 1, 144 | filters: List[str] = None, limit=None, params: dict = None): 145 | """ 146 | Insert data from crawled URLs to knowledge base. 147 | 148 | :param urls: URLs to be crawled and inserted. 149 | :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only 150 | :param filters: Include only URLs that match these regex patterns 151 | :param limit: max count of pages to crawl 152 | :param params: Runtime parameters for KB 153 | """ 154 | data={ 155 | 'urls': urls, 156 | 'crawl_depth': crawl_depth, 157 | 'limit': limit, 158 | 'filters': [] if filters is None else filters, 159 | } 160 | if params: 161 | data['params'] = params 162 | self.api.insert_into_knowledge_base( 163 | self.project.name, 164 | self.name, 165 | data=data 166 | ) 167 | 168 | def insert(self, data: Union[pd.DataFrame, Query, dict, list], params: dict = None): 169 | """ 170 | Insert data to knowledge base 171 | 172 | >>> # using dataframe 173 | >>> my_kb.insert(pd.read_csv('house_sales.csv')) 174 | >>> # using dict 175 | >>> my_kb.insert({'type': 'house', 'date': '2020-02-02'}) 176 | 177 | If id is already exists in knowledge base: 178 | - it will be replaced 179 | - `id` column can be defined by id_column param, see create knowledge base 180 | 181 | :param data: Dataframe or Query object or dict. 182 | :param params: Runtime parameters for KB 183 | """ 184 | 185 | if isinstance(data, Query): 186 | # for back compatibility 187 | return self.insert_query(data) 188 | 189 | if isinstance(data, dict): 190 | data = [data] 191 | elif isinstance(data, pd.DataFrame): 192 | for df in split_data(data, MAX_INSERT_SIZE): 193 | data = df.to_dict('records') 194 | self.insert(data, params=params) 195 | return 196 | elif not isinstance(data, list): 197 | raise ValueError("Unknown data type, accepted types: DataFrame, Query, dict, list") 198 | 199 | # chunking a big input data 200 | if len(data) > MAX_INSERT_SIZE: 201 | for chunk in split_data(data, MAX_INSERT_SIZE): 202 | self.insert(chunk, params=params) 203 | return 204 | 205 | data = {'rows': data} 206 | if params: 207 | data['params'] = params 208 | return self.api.insert_into_knowledge_base( 209 | self.project.name, 210 | self.name, 211 | data=data, 212 | ) 213 | 214 | def insert_query(self, data: Query, params: dict = None): 215 | """ 216 | Insert data to knowledge base using query 217 | 218 | >>> my_kb.insert(server.databases.example_db.tables.houses_sales.filter(type='house')) 219 | 220 | Data will be if id (defined by id_column param, see create knowledge base) is already exists in knowledge base 221 | it will be replaced 222 | 223 | :param data: Dataframe or Query object or dict. 224 | :param params: Runtime parameters for KB 225 | """ 226 | if is_saving(): 227 | # generate insert from select query 228 | if data.database is not None: 229 | ast_query = Insert( 230 | table=self.table_name, 231 | from_select=query_to_native_query(data) 232 | ) 233 | sql = ast_query.to_string() 234 | else: 235 | sql = f'INSERT INTO {self.table_name.to_string()} ({data.sql})' 236 | 237 | # don't execute it right now, return query object 238 | return Query(self, sql, self.database) 239 | 240 | # query have to be in context of mindsdb project 241 | data = {'query': data.sql} 242 | if params: 243 | data['params'] = params 244 | self.api.insert_into_knowledge_base( 245 | self.project.name, 246 | self.name, 247 | data=data 248 | ) 249 | 250 | def completion(self, query, **data): 251 | data['query'] = query 252 | 253 | return self.api.knowledge_base_completion(self.project.name, self.name, data) 254 | 255 | 256 | class KnowledgeBases(CollectionBase): 257 | """ 258 | **Knowledge bases** 259 | 260 | Get list: 261 | 262 | >>> kb_list = server.knowledge_bases.list() 263 | >>> kb = kb_list[0] 264 | 265 | Get by name: 266 | 267 | >>> kb = server.knowledge_bases.get('my_kb') 268 | >>> # or : 269 | >>> kb = server.knowledge_bases.my_kb 270 | 271 | Create: 272 | 273 | >>> kb = server.knowledge_bases.create('my_kb') 274 | 275 | Drop: 276 | 277 | >>> server.knowledge_bases.drop('my_kb') 278 | 279 | """ 280 | 281 | def __init__(self, project, api): 282 | self.project = project 283 | self.api = api 284 | 285 | def list(self) -> List[KnowledgeBase]: 286 | """ 287 | 288 | Get list of knowledge bases inside of project: 289 | 290 | >>> kb_list = project.knowledge_bases.list() 291 | 292 | :return: list of knowledge bases 293 | """ 294 | 295 | return [ 296 | KnowledgeBase(self.api, self.project, item) 297 | for item in self.api.list_knowledge_bases(self.project.name) 298 | ] 299 | 300 | def get(self, name: str) -> KnowledgeBase: 301 | """ 302 | Get knowledge base by name 303 | 304 | :param name: name of the knowledge base 305 | :return: KnowledgeBase object 306 | """ 307 | 308 | data = self.api.get_knowledge_base(self.project.name, name) 309 | return KnowledgeBase(self.api, self.project, data) 310 | 311 | def create( 312 | self, 313 | name: str, 314 | model: Model = None, 315 | storage: Table = None, 316 | metadata_columns: list = None, 317 | content_columns: list = None, 318 | id_column: str = None, 319 | params: dict = None, 320 | ) -> Union[KnowledgeBase, Query]: 321 | """ 322 | 323 | Create knowledge base: 324 | 325 | >>> kb = server.knowledge_bases.create( 326 | ... 'my_kb', 327 | ... model=server.models.emb_model, 328 | ... storage=server.databases.pvec.tables.tbl1, 329 | ... metadata_columns=['date', 'author'], 330 | ... content_columns=['review', 'description'], 331 | ... id_column='number', 332 | ... params={'a': 1} 333 | ...) 334 | 335 | :param name: name of the knowledge base 336 | :param model: embedding model, optional. Default: 'sentence_transformers' will be used (defined in mindsdb server) 337 | :param storage: vector storage, optional. Default: chromadb database will be created 338 | :param metadata_columns: columns to use as metadata, optional. Default: all columns which are not content and id 339 | :param content_columns: columns to use as content, optional. Default: all columns except id column 340 | :param id_column: the column to use as id, optinal. Default: 'id', if exists 341 | :param params: other parameters to knowledge base 342 | :return: created KnowledgeBase object 343 | """ 344 | 345 | params_out = {} 346 | 347 | if metadata_columns is not None: 348 | params_out['metadata_columns'] = metadata_columns 349 | 350 | if content_columns is not None: 351 | params_out['content_columns'] = content_columns 352 | 353 | if id_column is not None: 354 | params_out['id_column'] = id_column 355 | 356 | if params is not None: 357 | params_out.update(params) 358 | 359 | if model is not None: 360 | model = model.name 361 | 362 | payload = { 363 | 'name': name, 364 | 'model': model, 365 | 'params': params_out 366 | } 367 | 368 | if storage is not None: 369 | payload['storage'] = { 370 | 'database': storage.db.name, 371 | 'table': storage.name 372 | } 373 | 374 | self.api.create_knowledge_base(self.project.name, data=payload) 375 | 376 | return self.get(name) 377 | 378 | def drop(self, name: str): 379 | """ 380 | 381 | :param name: 382 | :return: 383 | """ 384 | 385 | return self.api.delete_knowledge_base(self.project.name, name) 386 | -------------------------------------------------------------------------------- /mindsdb_sdk/ml_engines.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from typing import List, Union 3 | 4 | from mindsdb_sql_parser.ast import Show, Identifier 5 | from mindsdb_sql_parser.ast.mindsdb import CreateMLEngine, DropMLEngine 6 | 7 | from mindsdb_sdk.utils.objects_collection import CollectionBase 8 | 9 | from .handlers import Handler 10 | 11 | @dataclass 12 | class MLEngine: 13 | """ 14 | :meta private: 15 | """ 16 | name: str 17 | handler: str 18 | connection_data: dict 19 | 20 | 21 | class MLEngines(CollectionBase): 22 | """ 23 | 24 | **ML engines collection** 25 | 26 | Examples of usage: 27 | 28 | Get list 29 | 30 | >>> ml_engines = con.ml_engines.list() 31 | 32 | Get 33 | 34 | >>> openai_engine = con.ml_engines.openai1 35 | 36 | Create 37 | 38 | >>> con.ml_engines.create( 39 | ... 'openai1', 40 | ... 'openai', 41 | ... connection_data={'api_key': '111'} 42 | ...) 43 | 44 | Drop 45 | 46 | >>> con.ml_engines.drop('openai1') 47 | 48 | Upload BYOM model. 49 | After uploading a new ml engin will be availbe to create new model from it. 50 | 51 | >>> model_code = open('/path/to/model/code').read() 52 | >>> model_requirements = open('/path/to/model/requirements').read() 53 | >>> ml_engine = con.ml_engines.create_byom( 54 | ... 'my_byom_engine', 55 | ... code=model_code, 56 | ... requirements=model_requirements 57 | ...) 58 | 59 | """ 60 | 61 | def __init__(self, api): 62 | self.api = api 63 | 64 | def list(self) -> List[MLEngine]: 65 | """ 66 | Returns list of ml engines on server 67 | 68 | :return: list of ml engines 69 | """ 70 | 71 | ast_query = Show(category='ml_engines') 72 | 73 | df = self.api.sql_query(ast_query.to_string()) 74 | # columns to lower case 75 | cols_map = {i: i.lower() for i in df.columns} 76 | df = df.rename(columns=cols_map) 77 | 78 | return [ 79 | MLEngine(**item) 80 | for item in df.to_dict('records') 81 | ] 82 | 83 | def get(self, name: str) -> MLEngine: 84 | """ 85 | Get ml engine by name 86 | 87 | :param name 88 | :return: ml engine object 89 | """ 90 | name = name.lower() 91 | for item in self.list(): 92 | if item.name == name: 93 | return item 94 | raise AttributeError(f"MLEngine doesn't exist {name}") 95 | 96 | def create(self, name: str, handler: Union[str, Handler], connection_data: dict = None) -> MLEngine: 97 | """ 98 | Create new ml engine and return it 99 | 100 | :param name: ml engine name, string 101 | :param handler: handler name, string or Handler 102 | :param connection_data: parameters for ml engine, dict, optional 103 | :return: created ml engine object 104 | """ 105 | 106 | if isinstance(handler, Handler): 107 | handler = handler.name 108 | 109 | ast_query = CreateMLEngine(Identifier(name), handler, params=connection_data) 110 | 111 | self.api.sql_query(ast_query.to_string()) 112 | 113 | return MLEngine(name, handler, connection_data) 114 | 115 | def create_byom(self, name: str, code: str, requirements: Union[str, List[str]] = None): 116 | """ 117 | Create new BYOM ML engine and return it 118 | 119 | :param code: model python code in string 120 | :param requirements: requirements for model. Optional if there is no special requirements. 121 | It can be content of 'requirement.txt' file or list of strings (item for every requirement). 122 | :return: created BYOM ml engine object 123 | """ 124 | 125 | if requirements is None: 126 | requirements = '' 127 | elif isinstance(requirements, list): 128 | requirements = '\n'.join(requirements) 129 | 130 | self.api.upload_byom(name, code, requirements) 131 | 132 | return MLEngine(name, 'byom', {}) 133 | 134 | def drop(self, name: str): 135 | """ 136 | Drop ml engine by name 137 | 138 | :param name: name 139 | """ 140 | ast_query = DropMLEngine(Identifier(name)) 141 | 142 | self.api.sql_query(ast_query.to_string()) 143 | 144 | -------------------------------------------------------------------------------- /mindsdb_sdk/models.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import time 4 | from typing import List, Union 5 | 6 | import pandas as pd 7 | 8 | from mindsdb_sql_parser.ast.mindsdb import CreatePredictor, DropPredictor 9 | from mindsdb_sql_parser.ast.mindsdb import RetrainPredictor, FinetunePredictor 10 | from mindsdb_sql_parser.ast import Identifier, Select, Star, Join, Describe, Set 11 | from mindsdb_sql_parser import parse_sql 12 | from mindsdb_sql_parser.exceptions import ParsingException 13 | 14 | from .ml_engines import MLEngine 15 | 16 | from mindsdb_sdk.utils.objects_collection import CollectionBase 17 | from mindsdb_sdk.utils.sql import dict_to_binary_op, query_to_native_query 18 | from mindsdb_sdk.utils.context import is_saving 19 | 20 | from .query import Query 21 | 22 | 23 | class Model: 24 | """ 25 | 26 | Versions 27 | 28 | List model versions 29 | 30 | >>> model.list_versions() 31 | 32 | 33 | Get info 34 | 35 | >>> print(model.get_status()) 36 | >>> print(model.data) 37 | 38 | Update model data from server 39 | 40 | >>> model.refresh() 41 | 42 | **Usng model** 43 | 44 | Dataframe on input 45 | 46 | >>> result_df = model.predict(df_rental) 47 | >>> result_df = model.predict(df_rental, params={'a': 'q'}) 48 | 49 | Dict on input 50 | 51 | >>> result_df = model.predict({'n_rooms': 2}) 52 | 53 | Deferred query on input 54 | 55 | >>> result_df = model.predict(query, params={'': ''}) 56 | 57 | Time series prediction 58 | 59 | >>> query = database.query('select * from table1 where type="house" and saledate>latest') 60 | >>> model.predict(query) 61 | 62 | The join model with table in raw query 63 | 64 | >>> result_df = project.query(''' 65 | ... SELECT m.saledate as date, m.ma as forecast 66 | ... FROM mindsdb.house_sales_model as m 67 | ... JOIN example_db.demo_data.house_sales as t 68 | ... WHERE t.saledate > LATEST AND t.type = 'house' 69 | ... AND t.bedrooms=2 70 | ... LIMIT 4; 71 | ...''').fetch() 72 | 73 | 74 | **Model managing** 75 | 76 | Fine-tuning 77 | 78 | >>> model.finetune(query) 79 | >>> model.finetune('select * from demo_data.house_sales', database='example_db') 80 | >>> model.finetune(query, params={'x': 2}) 81 | 82 | Retraining 83 | 84 | >>> model.retrain(query) 85 | >>> model.retrain('select * from demo_data.house_sales', database='example_db') 86 | >>> model.retrain(query, params={'x': 2}) 87 | 88 | Describe 89 | 90 | >>> df_info = model.describe() 91 | >>> df_info = model.describe('features') 92 | 93 | Change active version 94 | 95 | >>> model.set_active(version=3) 96 | 97 | """ 98 | 99 | def __init__(self, project, data): 100 | self.project = project 101 | 102 | self.data = data 103 | self.name = data['name'] 104 | self.version = None 105 | 106 | def __repr__(self): 107 | version = '' 108 | if self.version is not None: 109 | version = f', version={self.version}' 110 | return f'{self.__class__.__name__}({self.name}{version}, status={self.data.get("status")})' 111 | 112 | def _get_identifier(self): 113 | parts = [self.project.name, self.name] 114 | if self.version is not None: 115 | parts.append(str(self.version)) 116 | return Identifier(parts=parts) 117 | 118 | def predict(self, data: Union[pd.DataFrame, Query, dict], params: dict = None) -> Union[pd.DataFrame, Query]: 119 | """ 120 | Make prediction using model 121 | 122 | if data is dataframe 123 | it uses /model/predict http method and sends dataframe over it 124 | 125 | if data is select query with one table 126 | it replaces table to jon table and predictor and sends query over sql/query http method 127 | 128 | if data is select from join other complex query it modifies query to: 129 | 'select from (input query) join model' and sends it over sql/query http method 130 | 131 | :param data: dataframe or Query object as input to predictor 132 | :param params: parameters for predictor, optional 133 | :return: dataframe with result of prediction 134 | """ 135 | 136 | if isinstance(data, Query): 137 | # create join from select if it is simple select 138 | try: 139 | ast_query = parse_sql(data.sql, dialect='mindsdb') 140 | except ParsingException: 141 | ast_query = None 142 | 143 | # injection of join disabled yet 144 | # if isinstance(ast_query, Select) and isinstance(ast_query.from_table, Identifier): 145 | # # inject aliases 146 | # if ast_query.from_table.alias is None: 147 | # alias = 't' 148 | # ast_query.from_table.alias = Identifier(alias) 149 | # else: 150 | # alias = ast_query.from_table.alias.parts[-1] 151 | # 152 | # def inject_alias(node, is_table, **kwargs): 153 | # if not is_table: 154 | # if isinstance(node, Identifier): 155 | # if node.parts[0] != alias: 156 | # node.parts.insert(0, alias) 157 | # 158 | # query_traversal(ast_query, inject_alias) 159 | # 160 | # # replace table with join 161 | # model_identifier = self._get_identifier() 162 | # model_identifier.alias = Identifier('m') 163 | # 164 | # ast_query.from_table = Join( 165 | # join_type='join', 166 | # left=ast_query.from_table, 167 | # right=model_identifier 168 | # ) 169 | # 170 | # # select only model columns 171 | # ast_query.targets = [Identifier(parts=['m', Star()])] 172 | # 173 | 174 | model_identifier = self._get_identifier() 175 | model_identifier.alias = Identifier('m') 176 | 177 | if data.database is not None or ast_query is None or not isinstance(ast_query, Select): 178 | # use native query 179 | native_query = query_to_native_query(data) 180 | native_query.parentheses = True 181 | native_query.alias = Identifier('t') 182 | upper_query = Select( 183 | targets=[Identifier(parts=['m', Star()])], 184 | from_table=Join( 185 | join_type='join', 186 | left=native_query, 187 | right=model_identifier 188 | ) 189 | ) 190 | else: 191 | # wrap query to subselect 192 | model_identifier = self._get_identifier() 193 | model_identifier.alias = Identifier('m') 194 | 195 | ast_query.parentheses = True 196 | ast_query.alias = Identifier('t') 197 | upper_query = Select( 198 | targets=[Identifier(parts=['m', Star()])], 199 | from_table=Join( 200 | join_type='join', 201 | left=ast_query, 202 | right=model_identifier 203 | ) 204 | ) 205 | if params is not None: 206 | upper_query.using = params 207 | # execute in query's database 208 | sql = upper_query.to_string() 209 | if is_saving(): 210 | return Query(self, sql) 211 | 212 | return self.project.api.sql_query(sql, database=None) 213 | 214 | elif isinstance(data, dict): 215 | data = pd.DataFrame([data]) 216 | return self.project.api.model_predict(self.project.name, self.name, data, 217 | params=params, version=self.version) 218 | elif isinstance(data, pd.DataFrame): 219 | return self.project.api.model_predict(self.project.name, self.name, data, 220 | params=params, version=self.version) 221 | else: 222 | raise ValueError('Unknown input') 223 | 224 | def wait_complete(self): 225 | 226 | for i in range(400): 227 | time.sleep(0.3) 228 | 229 | status = self.get_status() 230 | if status in ('generating', 'training'): 231 | continue 232 | elif status == 'error': 233 | raise RuntimeError(f'Training failed: {self.data["error"]}') 234 | else: 235 | break 236 | 237 | def get_status(self) -> str: 238 | """ 239 | Refresh model data and return status of model 240 | 241 | :return: model status 242 | """ 243 | self.refresh() 244 | return self.data['status'] 245 | 246 | def refresh(self): 247 | """ 248 | Refresh model data from mindsdb server 249 | Model data can be changed during training process 250 | 251 | :return: model data 252 | """ 253 | model = self.project.get_model(self.name, self.version) 254 | self.data = model.data 255 | return self.data 256 | 257 | def finetune(self, 258 | query: Union[str, Query] = None, 259 | database: str = None, 260 | options: dict = None, 261 | engine: str = None) -> Union[Model, ModelVersion]: 262 | """ 263 | Call finetune of the model 264 | 265 | :param query: sql string or Query object to get data for fine-tuning, optional 266 | :param database: database to get data for fine-tuning, optional 267 | :param options: parameters for fine-tuning model, optional 268 | :param engine: ml engine, optional 269 | :return: Model object 270 | """ 271 | return self._retrain(ast_class=FinetunePredictor, 272 | query=query, database=database, 273 | options=options, engine=engine) 274 | 275 | def retrain(self, 276 | query: Union[str, Query] = None, 277 | database: str = None, 278 | options: dict = None, 279 | engine: str = None) -> Union[Model, ModelVersion]: 280 | """ 281 | Call retrain of the model 282 | 283 | :param query: sql string or Query object to get data for retraining, optional 284 | :param database: database to get data for retraining, optional 285 | :param options: parameters for retraining model, optional 286 | :param engine: ml engine, optional 287 | :return: Model object 288 | """ 289 | return self._retrain(ast_class=RetrainPredictor, 290 | query=query, database=database, 291 | options=options, engine=engine) 292 | 293 | def _retrain(self, 294 | ast_class, 295 | query: Union[str, Query] = None, 296 | database:str = None, 297 | options:dict = None, 298 | engine:str = None): 299 | if isinstance(query, Query): 300 | database = query.database 301 | query = query.sql 302 | elif isinstance(query, pd.DataFrame): 303 | raise NotImplementedError('Dataframe as input for training model is not supported yet') 304 | 305 | if database is not None: 306 | database = Identifier(database) 307 | 308 | if options is None: 309 | options = {} 310 | if engine is not None: 311 | options['engine'] = engine 312 | 313 | ast_query = ast_class( 314 | name=self._get_identifier(), 315 | query_str=query, 316 | integration_name=database, 317 | using=options or None, 318 | ) 319 | sql = ast_query.to_string() 320 | 321 | if is_saving(): 322 | return Query(self, sql) 323 | 324 | data = self.project.api.sql_query(sql) 325 | data = {k.lower(): v for k, v in data.items()} 326 | 327 | # return new instance 328 | base_class = self.__class__ 329 | return base_class(self.project, data) 330 | 331 | def describe(self, type: str = None) -> Union[pd.DataFrame, Query]: 332 | """ 333 | Return description of the model 334 | 335 | :param type: describe type (for lightwood is models, ensemble, features), optional 336 | :return: dataframe with result of description 337 | """ 338 | if self.version is not None: 339 | raise NotImplementedError 340 | 341 | identifier = self._get_identifier() 342 | if type is not None: 343 | identifier.parts.append(type) 344 | ast_query = Describe(identifier) 345 | 346 | sql = ast_query.to_string() 347 | if is_saving(): 348 | return Query(self, sql) 349 | 350 | return self.project.api.sql_query(sql) 351 | 352 | def list_versions(self) -> List[ModelVersion]: 353 | """ 354 | Show list of model versions 355 | 356 | :return: list ModelVersion objects 357 | """ 358 | return self.project.list_models(with_versions=True, name=self.name) 359 | 360 | def get_version(self, num: int) -> ModelVersion: 361 | """ 362 | Get model version by number 363 | 364 | :param num: version number 365 | :return: ModelVersion object 366 | """ 367 | 368 | num = int(num) 369 | for m in self.project.list_models(with_versions=True, name=self.name): 370 | if m.version == num: 371 | return m 372 | raise ValueError('Version is not found') 373 | 374 | def drop_version(self, num: int) -> ModelVersion: 375 | """ 376 | Drop version of the model 377 | 378 | >>> models.rentals_model.drop_version(version=10) 379 | 380 | :param num: version to drop 381 | """ 382 | 383 | return self.project.drop_model_version(self.name, num) 384 | 385 | def set_active(self, version: int): 386 | """ 387 | Change model active version 388 | 389 | :param version: version to set active 390 | """ 391 | ast_query = Set( 392 | category='active', 393 | value=Identifier(parts=[self.project.name, self.name, str(version)]) 394 | ) 395 | sql = ast_query.to_string() 396 | if is_saving(): 397 | return Query(self, sql) 398 | 399 | self.project.api.sql_query(sql) 400 | self.refresh() 401 | 402 | 403 | class ModelVersion(Model): 404 | def __init__(self, project, data): 405 | 406 | super().__init__(project, data) 407 | 408 | self.version = data['version'] 409 | 410 | 411 | class Models(CollectionBase): 412 | """ 413 | 414 | **Models** 415 | 416 | Get: 417 | 418 | >>> all_models = models.list() 419 | >>> model = all_models[0] 420 | 421 | Get version: 422 | 423 | >>> all_models = models.list(with_versions=True) 424 | >>> model = all_models[0] 425 | 426 | By name: 427 | 428 | >>> model = models.get('model1') 429 | >>> model = models.get('model1', version=2) 430 | 431 | """ 432 | 433 | def __init__(self, project, api): 434 | self.project = project 435 | self.api = api 436 | 437 | def create( 438 | self, 439 | name: str, 440 | predict: str = None, 441 | engine: Union[str, MLEngine] = None, 442 | query: Union[str, Query] = None, 443 | database: str = None, 444 | options: dict = None, 445 | timeseries_options: dict = None, **kwargs 446 | ) -> Union[Model, Query]: 447 | """ 448 | Create new model in project and return it 449 | 450 | If query/database is passed, it will be executed on mindsdb side 451 | 452 | Create, using params and qeury as string 453 | 454 | >>> model = models.create( 455 | ... 'rentals_model', 456 | ... predict='price', 457 | ... engine='lightwood', 458 | ... database='example_db', 459 | ... query='select * from table', 460 | ... options={ 461 | ... 'module': 'LightGBM' 462 | ... }, 463 | ... timeseries_options={ 464 | ... 'order': 'date', 465 | ... 'group': ['a', 'b'] 466 | ... } 467 | ...) 468 | 469 | Create, using deferred query. 'query' will be executed and converted to dataframe on mindsdb backend. 470 | 471 | >>> query = databases.db.query('select * from table') 472 | >>> model = models.create( 473 | ... 'rentals_model', 474 | ... predict='price', 475 | ... query=query, 476 | ...) 477 | 478 | :param name: name of the model 479 | :param predict: prediction target 480 | :param engine: ml engine for new model, default is mindsdb 481 | :param query: sql string or Query object to get data for training of model, optional 482 | :param database: database to get data for training, optional 483 | :param options: parameters for model, optional 484 | :param timeseries_options: parameters for forecasting model 485 | :return: created Model object, it can be still in training state 486 | """ 487 | if isinstance(query, Query): 488 | database = query.database 489 | query = query.sql 490 | elif isinstance(query, pd.DataFrame): 491 | raise NotImplementedError('Dataframe as input for training model is not supported yet') 492 | 493 | if database is not None: 494 | database = Identifier(database) 495 | 496 | if predict is not None: 497 | targets = [Identifier(predict)] 498 | else: 499 | targets = None 500 | 501 | ast_query = CreatePredictor( 502 | name=Identifier(parts=[self.project.name, name]), 503 | query_str=query, 504 | integration_name=database, 505 | targets=targets, 506 | ) 507 | 508 | if timeseries_options is not None: 509 | # check ts options 510 | allowed_keys = ['group', 'order', 'window', 'horizon'] 511 | for key in timeseries_options.keys(): 512 | if key not in allowed_keys: 513 | raise AttributeError(f"Unexpected time series option: {key}") 514 | 515 | if 'group' in timeseries_options: 516 | group = timeseries_options['group'] 517 | if not isinstance(group, list): 518 | group = [group] 519 | ast_query.group_by = [Identifier(i) for i in group] 520 | if 'order' in timeseries_options: 521 | ast_query.order_by = [Identifier(timeseries_options['order'])] 522 | if 'window' in timeseries_options: 523 | ast_query.window = timeseries_options['window'] 524 | if 'horizon' in timeseries_options: 525 | ast_query.horizon = timeseries_options['horizon'] 526 | 527 | if options is None: 528 | options = {} 529 | # options and kwargs are the same 530 | options.update(kwargs) 531 | 532 | if engine is not None: 533 | if isinstance(engine, MLEngine): 534 | engine = engine.name 535 | 536 | options['engine'] = engine 537 | ast_query.using = options 538 | 539 | sql = ast_query.to_string() 540 | 541 | if is_saving(): 542 | return Query(self, sql) 543 | 544 | df = self.project.api.sql_query(sql) 545 | if len(df) > 0: 546 | data = dict(df.iloc[0]) 547 | # to lowercase 548 | data = {k.lower(): v for k,v in data.items()} 549 | 550 | return Model(self.project, data) 551 | 552 | def get(self, name: str, version: int = None) -> Union[Model, ModelVersion]: 553 | """ 554 | Get model by name from project 555 | 556 | if version is passed it returns ModelVersion object with specific version 557 | 558 | :param name: name of the model 559 | :param version: version of model, optional 560 | :return: Model or ModelVersion object 561 | """ 562 | if version is not None: 563 | ret = self.list(with_versions=True, name=name, version=version) 564 | else: 565 | ret = self.list(name=name) 566 | if len(ret) == 0: 567 | raise AttributeError("Model doesn't exist") 568 | elif len(ret) == 1: 569 | return ret[0] 570 | else: 571 | raise RuntimeError('Several models with the same name/version') 572 | 573 | def drop(self, name: str): 574 | """ 575 | Drop model from project with all versions 576 | 577 | >>> models.drop('rentals_model') 578 | 579 | :param name: name of the model 580 | """ 581 | ast_query = DropPredictor(name=Identifier(parts=[self.project.name, name])) 582 | sql = ast_query.to_string() 583 | if is_saving(): 584 | return Query(self, sql) 585 | 586 | self.project.api.sql_query(sql) 587 | 588 | 589 | def list(self, with_versions: bool = False, 590 | name: str = None, 591 | version: int = None) -> List[Union[Model, ModelVersion]]: 592 | """ 593 | List models (or model versions) in project 594 | 595 | If with_versions = True 596 | it shows all models with version (executes 'select * from models_versions') 597 | 598 | Otherwise it shows only models (executes 'select * from models') 599 | 600 | :param with_versions: show model versions 601 | :param name: to show models or versions only with selected name, optional 602 | :param version: to show model or versions only with selected version, optional 603 | :return: list of Model or ModelVersion objects 604 | """ 605 | 606 | model_class = Model 607 | 608 | filters = {} 609 | if name is not None: 610 | filters['NAME'] = name 611 | if version is not None: 612 | filters['VERSION'] = version 613 | 614 | if with_versions: 615 | model_class = ModelVersion 616 | else: 617 | filters['ACTIVE'] = '1' 618 | 619 | ast_query = Select( 620 | targets=[Star()], 621 | from_table=Identifier('models'), 622 | where=dict_to_binary_op(filters) 623 | ) 624 | df = self.project.query(ast_query.to_string()).fetch() 625 | 626 | # columns to lower case 627 | cols_map = { i: i.lower() for i in df.columns } 628 | df = df.rename(columns=cols_map) 629 | 630 | return [ 631 | model_class(self.project, item) 632 | for item in df.to_dict('records') 633 | ] -------------------------------------------------------------------------------- /mindsdb_sdk/projects.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from mindsdb_sql_parser.ast.mindsdb import CreateDatabase, DropPredictor 4 | from mindsdb_sql_parser.ast import DropDatabase 5 | from mindsdb_sql_parser.ast import Identifier 6 | 7 | from mindsdb_sdk.agents import Agents 8 | from mindsdb_sdk.skills import Skills 9 | from mindsdb_sdk.utils.objects_collection import CollectionBase 10 | 11 | from .models import Models 12 | from .query import Query 13 | from .views import Views 14 | from .jobs import Jobs 15 | from .knowledge_bases import KnowledgeBases 16 | 17 | 18 | class Project: 19 | """ 20 | Allows to work with project: to manage models and views inside of it or call raw queries inside of project 21 | 22 | Server instance allows to manipulate project and databases (integration) on mindsdb server 23 | 24 | Attributes for accessing to different objects: 25 | - models, see :func:`~mindsdb_sdk.models.Models` 26 | - views, see :func:`~mindsdb_sdk.views.Views` 27 | - jobs, see :func:`~mindsdb_sdk.jobs.Jobs` 28 | 29 | It is possible to cal queries from project context: 30 | 31 | Making prediciton using sql: 32 | 33 | >>> query = project.query('select * from database.table join model1') 34 | >>> df = query.fetch() 35 | 36 | Making time series prediction: 37 | 38 | >>> df = project.query(''' 39 | ... SELECT m.saledate as date, m.ma as forecast 40 | ... FROM mindsdb.house_sales_model as m 41 | ... JOIN example_db.demo_data.house_sales as t 42 | ... WHERE t.saledate > LATEST AND t.type = 'house' 43 | ... AND t.bedrooms=2 44 | ... LIMIT 4; 45 | ... ''').fetch() 46 | 47 | """ 48 | 49 | def __init__(self, server, api, name): 50 | self.name = name 51 | self.api = api 52 | self.server = server 53 | 54 | self.models = Models(self, api) 55 | 56 | # old api 57 | self.get_model = self.models.get 58 | self.list_models = self.models.list 59 | self.create_model = self.models.create 60 | self.drop_model = self.models.drop 61 | 62 | self.views = Views(self, api) 63 | 64 | # old api 65 | self.get_view = self.views.get 66 | self.list_views = self.views.list 67 | self.create_view = self.views.create 68 | self.drop_view = self.views.drop 69 | 70 | self.jobs = Jobs(self, api) 71 | 72 | # old api 73 | self.get_job = self.jobs.get 74 | self.list_jobs = self.jobs.list 75 | self.create_job = self.jobs.create 76 | self.drop_job = self.jobs.drop 77 | 78 | self.knowledge_bases = KnowledgeBases(self, api) 79 | 80 | self.skills = Skills(self, api) 81 | self.agents = Agents(self, api) 82 | 83 | self.minds = self.agents #alias 84 | 85 | def __repr__(self): 86 | return f'{self.__class__.__name__}({self.name})' 87 | 88 | def query(self, sql: str) -> Query: 89 | """ 90 | Execute raw query inside of project 91 | 92 | :param sql: sql query 93 | :return: Query object 94 | """ 95 | return Query(self.api, sql, database=self.name) 96 | 97 | def drop_model_version(self, name: str, version: int): 98 | """ 99 | Drop version of the model 100 | 101 | :param name: name of the model 102 | :param version: version to drop 103 | """ 104 | ast_query = DropPredictor(Identifier(parts=[name, str(version)])) 105 | 106 | self.query(ast_query.to_string()).fetch() 107 | 108 | 109 | class Projects(CollectionBase): 110 | """ 111 | Projects 112 | ---------- 113 | 114 | list of projects 115 | 116 | >>> projects.list() 117 | 118 | create 119 | 120 | >>> project = projects.create('proj') 121 | 122 | drop 123 | 124 | >>> projects.drop('proj') 125 | 126 | get existing 127 | 128 | >>> projects.get('proj') 129 | 130 | by attribute 131 | >>> projects.proj 132 | 133 | """ 134 | 135 | def __init__(self, server, api): 136 | self.api = api 137 | self.server = server 138 | 139 | def _list_projects(self): 140 | data = self.api.sql_query("select NAME from information_schema.databases where TYPE='project'") 141 | return list(data.NAME) 142 | 143 | def list(self) -> List[Project]: 144 | """ 145 | Show list of project on server 146 | 147 | :return: list of Project objects 148 | """ 149 | # select * from information_schema.databases where TYPE='project' 150 | return [Project(self.server, self.api, name) for name in self._list_projects()] 151 | 152 | def get(self, name: str = 'mindsdb') -> Project: 153 | """ 154 | Get Project by name 155 | 156 | :param name: name of project 157 | :return: Project object 158 | """ 159 | if name not in self._list_projects(): 160 | raise AttributeError("Project doesn't exist") 161 | return Project(self.server, self.api, name) 162 | 163 | def create(self, name: str) -> Project: 164 | """ 165 | Create new project and return it 166 | 167 | :param name: name of the project 168 | :return: Project object 169 | """ 170 | 171 | ast_query = CreateDatabase( 172 | name=Identifier(name), 173 | engine='mindsdb', 174 | parameters={} 175 | ) 176 | 177 | self.api.sql_query(ast_query.to_string()) 178 | return Project(self.server, self.api, name) 179 | 180 | def drop(self, name: str): 181 | """ 182 | Drop project from server 183 | 184 | :param name: name of the project 185 | """ 186 | ast_query = DropDatabase(name=Identifier(name)) 187 | self.api.sql_query(ast_query.to_string()) 188 | -------------------------------------------------------------------------------- /mindsdb_sdk/query.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | class Query: 5 | def __init__(self, api, sql, database=None): 6 | self.api = api 7 | 8 | self.sql = sql 9 | self.database = database 10 | 11 | def __repr__(self): 12 | sql = self.sql.replace('\n', ' ') 13 | if len(sql) > 40: 14 | sql = sql[:37] + '...' 15 | 16 | return f'{self.__class__.__name__}({sql})' 17 | 18 | def fetch(self) -> pd.DataFrame: 19 | """ 20 | Executes query in mindsdb server and returns result 21 | :return: dataframe with result 22 | """ 23 | return self.api.sql_query(self.sql, self.database) 24 | 25 | -------------------------------------------------------------------------------- /mindsdb_sdk/server.py: -------------------------------------------------------------------------------- 1 | from .agents import Agents 2 | from .databases import Databases 3 | from .projects import Project, Projects 4 | from .ml_engines import MLEngines 5 | from .handlers import Handlers 6 | from .skills import Skills 7 | 8 | 9 | class Server(Project): 10 | """ 11 | Server instance allows to manipulate project and databases (integration) on mindsdb server 12 | 13 | Attributes for accessing to different objects: 14 | 15 | - projects, see :func:`~mindsdb_sdk.projects.Projects` 16 | - databases, see :func:`~mindsdb_sdk.databases.Databases` 17 | - ml_engines, see :func:`~mindsdb_sdk.ml_engines.MLEngines` 18 | - ml_handlers, see :func:`~mindsdb_sdk.handlers.MLHandlers` 19 | - data_handlers, see :func:`~mindsdb_sdk.handlers.DataHandlers` 20 | 21 | Server is also root(mindsdb) project and has attributes of project: 22 | - models, see :func:`~mindsdb_sdk.models.Models` 23 | - views, see :func:`~mindsdb_sdk.views.Views` 24 | - jobs, see :func:`~mindsdb_sdk.jobs.Jobs` 25 | 26 | """ 27 | 28 | def __init__(self, api, skills: Skills = None, agents: Agents = None): 29 | # server is also mindsdb project 30 | self.databases = Databases(api) 31 | self.ml_engines = MLEngines(api) 32 | super().__init__(self, api, 'mindsdb') 33 | 34 | self.projects = Projects(self, api) 35 | 36 | # old api 37 | self.get_project = self.projects.get 38 | self.list_projects = self.projects.list 39 | self.create_project = self.projects.create 40 | self.drop_project = self.projects.drop 41 | 42 | # old api 43 | self.get_database = self.databases.get 44 | self.list_databases = self.databases.list 45 | self.create_database = self.databases.create 46 | self.drop_database = self.databases.drop 47 | 48 | self.ml_handlers = Handlers(self.api, 'ml') 49 | self.data_handlers = Handlers(self.api, 'data') 50 | 51 | def status(self) -> dict: 52 | """ 53 | Get server information. It could content version 54 | Example of getting version for local: 55 | 56 | >>> print(server.status()['mindsdb_version']) 57 | 58 | :return: server status info 59 | """ 60 | return self.api.status() 61 | 62 | def __repr__(self): 63 | return f'{self.__class__.__name__}({self.api.url})' 64 | 65 | 66 | -------------------------------------------------------------------------------- /mindsdb_sdk/skills.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | from mindsdb_sdk.utils.objects_collection import CollectionBase 4 | 5 | 6 | class Skill(): 7 | """Represents a MindsDB skill 8 | 9 | Working with skills: 10 | 11 | Get a skill by name: 12 | 13 | >>> skill = skills.get('my_skill') 14 | 15 | List all skills: 16 | 17 | >>> skills = skills.list() 18 | 19 | Create a new SQL skill: 20 | 21 | >>> text_to_sql_skill = skills.create('text_to_sql', 'sql', { 'tables': ['my_table'], 'database': 'my_database', 'description': 'my_description'}) 22 | 23 | Update a skill: 24 | 25 | >>> skill.params = { 'tables': ['new_table'], 'database': 'new_database' } 26 | >>> updated_skill = skills.update('my_skill', skill) 27 | 28 | Delete a skill by name 29 | 30 | >>> skills.delete('my_skill') 31 | """ 32 | def __init__( 33 | self, 34 | name: str, 35 | type: str, 36 | params: dict = None): 37 | self.name = name 38 | self.type = type 39 | self.params = params or {} 40 | 41 | def __eq__(self, other): 42 | if self.name != other.name: 43 | return False 44 | if self.type != other.type: 45 | return False 46 | return self.params == other.params 47 | 48 | def __repr__(self): 49 | return f'{self.__class__.__name__}(name: {self.name})' 50 | 51 | @classmethod 52 | def from_json(cls, json: dict): 53 | name = json['name'] 54 | type = json['type'] 55 | params = json['params'] 56 | if json['type'] == 'sql': 57 | return SQLSkill(name, params['tables'], params['database'], params.get('description', '')) 58 | if json['type'] == 'retrieval': 59 | return RetrievalSkill(name, params['source'], params.get('description', '')) 60 | return Skill(name, type, params) 61 | 62 | 63 | class SQLSkill(Skill): 64 | """Represents a MindsDB skill for agents to interact with MindsDB databases""" 65 | def __init__(self, name: str, tables: List[str], database: str, description: str): 66 | params = { 67 | 'database': database, 68 | 'tables': tables, 69 | 'description': description 70 | } 71 | super().__init__(name, 'sql', params) 72 | 73 | class RetrievalSkill(Skill): 74 | """Represents a MindsDB skill for agents to interact with MindsDB data sources""" 75 | def __init__(self, name: str, knowledge_base: str, description: str): 76 | params = { 77 | 'source': knowledge_base, 78 | 'description': description 79 | } 80 | super().__init__(name, 'retrieval', params) 81 | 82 | 83 | class Skills(CollectionBase): 84 | """Collection for skills""" 85 | def __init__(self, project, api): 86 | self.api = api 87 | self.project = project 88 | 89 | def list(self) -> List[Skill]: 90 | """ 91 | List available skills. 92 | 93 | :return: list of skills 94 | """ 95 | data = self.api.skills(self.project.name) 96 | return [Skill.from_json(skill) for skill in data] 97 | 98 | def get(self, name: str) -> Skill: 99 | """ 100 | Gets a skill by name. 101 | 102 | :param name: name of the skill 103 | 104 | :return: skill with the given name 105 | """ 106 | data = self.api.skill(self.project.name, name) 107 | return Skill.from_json(data) 108 | 109 | def create(self, name: str, type: str, params: dict = None) -> Skill: 110 | """ 111 | Create new skill and return it 112 | 113 | :param name: Name of the skill to be created 114 | :param type: Type of the skill to be created 115 | :param params: Parameters for the skill to be created 116 | 117 | :return: created skill object 118 | """ 119 | _ = self.api.create_skill(self.project.name, name, type, params) 120 | if type == 'sql': 121 | return SQLSkill(name, params['tables'], params['database'], params['description']) 122 | return Skill(name, type, params) 123 | 124 | def update(self, name: str, updated_skill: Skill) -> Skill: 125 | """ 126 | Update a skill by name. 127 | 128 | param name: Name of the skill to be updated 129 | :param updated_skill: Skill with updated fields 130 | 131 | :return: updated skillobject 132 | """ 133 | data = self.api.update_skill(self.project.name, name, updated_skill.name, updated_skill.type, updated_skill.params) 134 | return Skill.from_json(data) 135 | 136 | def drop(self, name: str): 137 | """ 138 | Drop a skill by name. 139 | 140 | :param name: Name of the skill to be dropped 141 | """ 142 | _ = self.api.delete_skill(self.project.name, name) 143 | -------------------------------------------------------------------------------- /mindsdb_sdk/tables.py: -------------------------------------------------------------------------------- 1 | import copy 2 | from typing import Union 3 | from typing import List 4 | 5 | import pandas as pd 6 | 7 | from mindsdb_sql_parser.ast import DropTables, CreateTable 8 | from mindsdb_sql_parser.ast import Select, Star, Identifier, Constant, Delete, Insert, Update, Last, BinaryOperation 9 | 10 | from mindsdb_sdk.utils.sql import dict_to_binary_op, add_condition, query_to_native_query 11 | from mindsdb_sdk.utils.objects_collection import CollectionBase 12 | from mindsdb_sdk.utils.context import is_saving 13 | 14 | from .query import Query 15 | 16 | 17 | class Table(Query): 18 | def __init__(self, db, name): 19 | # empty database 20 | super().__init__(db.api, '', None) 21 | self.name = name 22 | self.table_name = Identifier(parts=[db.name, name]) 23 | self.db = db 24 | self._filters = {} 25 | self._limit = None 26 | self._track_column = None 27 | self._update_query() 28 | 29 | def _filters_repr(self): 30 | filters = '' 31 | if len(self._filters) > 0: 32 | filters = ', '.join( 33 | f'{k}={v}' 34 | for k, v in self._filters.items() 35 | ) 36 | filters = ', ' + filters 37 | return filters 38 | 39 | def __repr__(self): 40 | limit_str = '' 41 | if self._limit is not None: 42 | limit_str = f'; limit={self._limit}' 43 | return f'{self.__class__.__name__}({self.table_name}{self._filters_repr()}{limit_str})' 44 | 45 | def filter(self, **kwargs): 46 | """ 47 | Applies filters on table 48 | table.filter(a=1, b=2) adds where condition to table: 49 | 'select * from table1 where a=1 and b=2' 50 | 51 | :param kwargs: filter 52 | :return: Table object 53 | """ 54 | # creates new object 55 | query = copy.deepcopy(self) 56 | query._filters.update(kwargs) 57 | query._update_query() 58 | return query 59 | 60 | def limit(self, val: int): 61 | """ 62 | Applies limit condition to table query 63 | 64 | :param val: limit size 65 | :return: Table object 66 | """ 67 | query = copy.deepcopy(self) 68 | query._limit = val 69 | query._update_query() 70 | return query 71 | 72 | def track(self, column): 73 | """ 74 | Apply tracking column to table. ('LAST' keyword in mindsdb) 75 | First call returns nothing 76 | The next calls return new records since previous call (where value of track_column is greater) 77 | 78 | Example: 79 | 80 | >>> query = con.databases.my_db.tables.sales.filter(type='house').track('created_at') 81 | >>> # first call returns no records 82 | >>> df = query.fetch() 83 | >>> # second call returns rows with created_at is greater since previous fetch 84 | >>> df = query.fetch() 85 | 86 | :param column: column to track new data from table. 87 | :return: Table object 88 | """ 89 | query = copy.deepcopy(self) 90 | query._track_column = column 91 | 92 | query._update_query() 93 | return query 94 | 95 | def _update_query(self): 96 | where = dict_to_binary_op(self._filters) 97 | if self._track_column is not None: 98 | condition = BinaryOperation(op='>', args=[Identifier(self._track_column), Last()]) 99 | where = add_condition(where, condition) 100 | 101 | ast_query = Select( 102 | targets=[Star()], 103 | from_table=self.table_name, 104 | where=where 105 | ) 106 | if self._limit is not None: 107 | ast_query.limit = Constant(self._limit) 108 | self.sql = ast_query.to_string() 109 | 110 | def insert(self, query: Union[pd.DataFrame, Query]): 111 | """ 112 | Insert data from query of dataframe 113 | :param query: dataframe of 114 | :return: 115 | """ 116 | 117 | if isinstance(query, pd.DataFrame): 118 | # insert data 119 | data_split = query.to_dict('split') 120 | 121 | ast_query = Insert( 122 | table=self.table_name, 123 | columns=data_split['columns'], 124 | values=data_split['data'] 125 | ) 126 | 127 | sql = ast_query.to_string() 128 | 129 | elif isinstance(query, Query): 130 | # insert from select 131 | 132 | if query.database is not None: 133 | # use native query 134 | ast_query = Insert( 135 | table=self.table_name, 136 | from_select=query_to_native_query(query) 137 | ) 138 | sql = ast_query.to_string() 139 | else: 140 | sql = f'INSERT INTO {self.table_name.to_string()} ({query.sql})', 141 | else: 142 | raise ValueError(f'Invalid query type: {query}') 143 | 144 | if is_saving(): 145 | return Query(self, sql) 146 | 147 | self.api.sql_query(sql) 148 | 149 | def delete(self, **kwargs): 150 | """ 151 | Deletes record from table using filters 152 | 153 | >>> table.delete(a=1, b=2) 154 | 155 | :param kwargs: filter 156 | """ 157 | 158 | ast_query = Delete( 159 | table=self.table_name, 160 | where=dict_to_binary_op(kwargs) 161 | ) 162 | sql = ast_query.to_string() 163 | 164 | if is_saving(): 165 | return Query(self, sql) 166 | 167 | self.api.sql_query(sql) 168 | 169 | def update(self, values: Union[dict, Query], on: list = None, filters: dict = None): 170 | ''' 171 | Update table by condition of from other table. 172 | 173 | If 'values' is a dict: 174 | it will be an update by condition 175 | 'filters' is required 176 | used command: update table set a=1 where x=1 177 | 178 | If 'values' is a Query: 179 | it will be an update from select 180 | 'on' is required 181 | used command: update table on a,b from (query) 182 | 183 | :param values: input for update, can be dict or query 184 | :param on: list of column to map subselect to table ['a', 'b', ...] 185 | :param filters: dict to filter updated rows, {'column': 'value', ...} 186 | 187 | ''' 188 | 189 | if isinstance(values, Query): 190 | # is update from select 191 | if on is None: 192 | raise ValueError('"on" parameter is required for update from query') 193 | 194 | # insert from select 195 | if values.database is not None: 196 | ast_query = Update( 197 | table=self.table_name, 198 | keys=[Identifier(col) for col in on], 199 | from_select=query_to_native_query(values) 200 | ) 201 | sql = ast_query.to_string() 202 | else: 203 | map_cols = ', '.join(on) 204 | sql = f'UPDATE {self.table_name.to_string()} ON {map_cols} FROM ({values.sql})' 205 | 206 | elif isinstance(values, dict): 207 | # is regular update 208 | if filters is None: 209 | raise ValueError('"filters" parameter is required for update') 210 | 211 | update_columns = { 212 | k: Constant(v) 213 | for k, v in values.items() 214 | } 215 | 216 | ast_query = Update( 217 | table=self.table_name, 218 | update_columns=update_columns, 219 | where=dict_to_binary_op(filters) 220 | ) 221 | 222 | sql = ast_query.to_string() 223 | else: 224 | raise NotImplementedError 225 | 226 | if is_saving(): 227 | return Query(self, sql) 228 | 229 | self.api.sql_query(sql) 230 | 231 | 232 | class Tables(CollectionBase): 233 | """ 234 | Wortking with tables: 235 | Get table as Query object 236 | 237 | >>> table = tables.get('table1') 238 | 239 | Filter and limit 240 | 241 | >>> table = table.filter(a=1, b='2') 242 | >>> table = table.limit(3) 243 | 244 | Get content of table as dataframe. At that moment query will be sent on server and executed 245 | 246 | >>> df = table.fetch() 247 | 248 | Creating table 249 | 250 | From query: 251 | 252 | >>> table = tables.create('table2', query) 253 | 254 | From other table 255 | 256 | >>> table2 = table.create('table2', table) 257 | 258 | Uploading file 259 | 260 | >>> db = con.databases.files 261 | >>> db.tables.create('filename', dataframe) 262 | 263 | ` Droping table 264 | 265 | >>> db.tables.drop('table2') 266 | """ 267 | 268 | def __init__(self, database, api): 269 | self.database = database 270 | self.api = api 271 | 272 | def _list_tables(self): 273 | df = self.database.query('show tables').fetch() 274 | 275 | # first column 276 | return list(df[df.columns[0]]) 277 | 278 | def list(self) -> List[Table]: 279 | """ 280 | Show list of tables in integration 281 | 282 | :return: list of Table objects 283 | """ 284 | return [Table(self.database, name) for name in self._list_tables()] 285 | 286 | def get(self, name: str) -> Table: 287 | """ 288 | Get table by name 289 | 290 | :param name: name of table 291 | :return: Table object 292 | """ 293 | 294 | return Table(self.database, name) 295 | 296 | def create(self, name: str, query: Union[pd.DataFrame, Query], replace: bool = False) -> Union[Table, Query]: 297 | """ 298 | Create new table and return it. 299 | 300 | On mindsdb server it executes command: 301 | `insert into a (select ...)` 302 | 303 | or if replace is True 304 | `create table a (select ...)` 305 | 306 | 'select ...' is extracted from input Query 307 | 308 | :param name: name of table 309 | :param query: Query object 310 | :param replace: if true, 311 | :return: Table object 312 | """ 313 | 314 | if isinstance(query, pd.DataFrame) and self.database.name == 'files': 315 | # now it is only possible for file uploading 316 | self.api.upload_file(name, query) 317 | 318 | return Table(self.database, name) 319 | 320 | if not isinstance(query, Query): 321 | raise NotImplementedError 322 | 323 | # # query can be in different database: wrap to NativeQuery 324 | # ast_query = CreateTable( 325 | # name=Identifier(name), 326 | # is_replace=is_replace, 327 | # from_select=Select( 328 | # targets=[Star()], 329 | # from_table=NativeQuery( 330 | # integration=Identifier(data.database), 331 | # query=data.sql 332 | # ) 333 | # ) 334 | # ) 335 | # self.query(ast_query.to_string()).fetch() 336 | 337 | # call in query database 338 | table = Identifier(parts=[self.database.name, name]) 339 | 340 | if query.database is not None: 341 | # use native query 342 | ast_query = CreateTable( 343 | name=table, 344 | is_replace=replace, 345 | from_select=query_to_native_query(query) 346 | ) 347 | sql = ast_query.to_string() 348 | else: 349 | replace_str = '' 350 | if replace: 351 | replace_str = ' or replace' 352 | 353 | sql = f'create{replace_str} table {table.to_string()} ({query.sql})' 354 | 355 | if is_saving(): 356 | return Query(self, sql) 357 | 358 | self.api.sql_query(sql) 359 | 360 | return Table(self.database, name) 361 | 362 | def drop(self, name: str): 363 | """ 364 | Delete table 365 | 366 | :param name: name of table 367 | """ 368 | table = Identifier(parts=[self.database.name, name]) 369 | 370 | ast_query = DropTables( 371 | tables=[table] 372 | ) 373 | sql = ast_query.to_string() 374 | 375 | if is_saving(): 376 | return Query(self, sql) 377 | self.api.sql_query(sql) 378 | 379 | -------------------------------------------------------------------------------- /mindsdb_sdk/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindsdb/mindsdb_python_sdk/c8c51f2597f04d30e6d16b798a304153e7bd9233/mindsdb_sdk/utils/__init__.py -------------------------------------------------------------------------------- /mindsdb_sdk/utils/agents.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | import logging 4 | from typing import Dict, Any, Generator, Optional, Tuple 5 | 6 | 7 | class MindsDBSQLStreamParser: 8 | """ 9 | A utility class for parsing SQL queries from MindsDB completion streams. 10 | 11 | This class provides methods to process completion streams, extract SQL queries, 12 | and accumulate full responses. 13 | 14 | Attributes: 15 | logger (logging.Logger): The logger instance for this class. 16 | """ 17 | 18 | def __init__(self, log_level: int = logging.INFO): 19 | """ 20 | Initialize the MindsDBSQLStreamParser. 21 | 22 | Args: 23 | log_level (int, optional): The logging level to use. Defaults to logging.INFO. 24 | """ 25 | self.logger = logging.getLogger(__name__) 26 | self.logger.setLevel(log_level) 27 | 28 | # Create a console handler and set its level 29 | ch = logging.StreamHandler() 30 | ch.setLevel(log_level) 31 | 32 | # Create a formatter 33 | formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') 34 | 35 | # Add the formatter to the handler 36 | ch.setFormatter(formatter) 37 | 38 | # Add the handler to the logger 39 | self.logger.addHandler(ch) 40 | 41 | def stream_and_parse_sql_query(self, completion_stream: Generator[Dict[str, Any], None, None]) -> Generator[ 42 | Dict[str, Optional[str]], None, None]: 43 | """ 44 | Stream and parse the completion stream, yielding output and SQL queries. 45 | 46 | This generator function processes each chunk of the completion stream, 47 | extracts any output and SQL queries, and yields the results. 48 | 49 | Args: 50 | completion_stream (Generator[Dict[str, Any], None, None]): The input completion stream. 51 | 52 | Yields: 53 | Dict[str, Optional[str]]: A dictionary containing 'output' and 'sql_query' keys. 54 | - 'output': The extracted output string from the chunk, if any. 55 | - 'sql_query': The extracted SQL query string, if found in the chunk. 56 | 57 | Note: 58 | This function will only yield the first SQL query it finds in the stream. 59 | """ 60 | sql_query_found = False 61 | 62 | for chunk in completion_stream: 63 | output = "" 64 | sql_query = None 65 | 66 | # Log full chunk at DEBUG level 67 | self.logger.debug(f"Processing chunk: {json.dumps(chunk, indent=2)}") 68 | 69 | # Log important info at INFO level 70 | if isinstance(chunk, dict): 71 | if 'quick_response' in chunk: 72 | self.logger.info(f"Quick response received: {json.dumps(chunk)}") 73 | 74 | output = chunk.get('output', '') 75 | if output: 76 | self.logger.info(f"Chunk output: {output}") 77 | 78 | if 'messages' in chunk: 79 | for message in chunk['messages']: 80 | if message.get('role') == 'assistant': 81 | self.logger.info(f"Assistant message: {message.get('content', '')}") 82 | if chunk.get('type') == 'sql': 83 | sql_query = chunk['content'] 84 | self.logger.info(f"Generated SQL: {sql_query}") 85 | 86 | elif isinstance(chunk, str): 87 | output = chunk 88 | self.logger.info(f"String chunk received: {chunk}") 89 | 90 | yield { 91 | 'output':output, 92 | 'sql_query':sql_query 93 | } 94 | 95 | def process_stream(self, completion_stream: Generator[Dict[str, Any], None, None]) -> Tuple[str, Optional[str]]: 96 | """ 97 | Process the completion stream and extract the SQL query. 98 | 99 | This method iterates through the stream, accumulates the full response, 100 | logs outputs, and extracts the SQL query when found. 101 | 102 | Args: 103 | completion_stream (Generator[Dict[str, Any], None, None]): The input completion stream. 104 | 105 | Returns: 106 | Tuple[str, Optional[str]]: A tuple containing: 107 | - The full accumulated response as a string. 108 | - The extracted SQL query as a string, or None if no query was found. 109 | """ 110 | full_response = "" 111 | sql_query = None 112 | 113 | self.logger.info("Starting to process completion stream...") 114 | 115 | for result in self.stream_and_parse_sql_query(completion_stream): 116 | if result['output']: 117 | self.logger.info(f"Output: {result['output']}") 118 | full_response += result['output'] 119 | 120 | if result['sql_query'] and sql_query is None: 121 | sql_query = result['sql_query'] 122 | self.logger.info(f"Extracted SQL Query: {sql_query}") 123 | 124 | self.logger.info(f"Full Response: {full_response}") 125 | self.logger.info(f"Final SQL Query: {sql_query}") 126 | 127 | return full_response, sql_query 128 | -------------------------------------------------------------------------------- /mindsdb_sdk/utils/context.py: -------------------------------------------------------------------------------- 1 | from contextvars import ContextVar 2 | 3 | context_storage = ContextVar('create_context') 4 | 5 | 6 | def set_context(name: str, value: str): 7 | """ 8 | Set context value to variable 9 | 10 | :param name: variable name 11 | :param value: variable value 12 | """ 13 | data = context_storage.get({}) 14 | data[name] = value 15 | 16 | context_storage.set(data) 17 | 18 | 19 | def get_context(name: str) -> str: 20 | """ 21 | Get context value fom variable 22 | 23 | :param name: variable name 24 | :return: variable value 25 | """ 26 | 27 | data = context_storage.get({}) 28 | return data.get(name) 29 | 30 | 31 | def set_saving(name: str): 32 | """ 33 | Set name of saving object to context 34 | 35 | :param name: namve of the object 36 | """ 37 | set_context('saving', name) 38 | 39 | 40 | def is_saving() -> bool: 41 | """ 42 | Returns true if object is saved at the moment 43 | """ 44 | 45 | return get_context('saving') is not None 46 | 47 | -------------------------------------------------------------------------------- /mindsdb_sdk/utils/mind.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | from pydantic import BaseModel, Field 3 | from typing import List, Optional 4 | from uuid import uuid4 5 | 6 | import requests 7 | from logging import getLogger 8 | 9 | logger = getLogger(__name__) 10 | 11 | 12 | # Define the Mind entity 13 | class Mind: 14 | """ 15 | Mind entity 16 | """ 17 | 18 | def __init__(self, name): 19 | self.name = name 20 | 21 | 22 | class DataSourceConfig(BaseModel): 23 | """ 24 | Represents a data source that can be made available to a Mind. 25 | """ 26 | id: str = Field(default_factory=lambda: uuid4().hex) 27 | 28 | # Description for underlying agent to know, based on context, whether to access this data source. 29 | description: str 30 | 31 | 32 | class DatabaseConfig(DataSourceConfig): 33 | """ 34 | Represents a database that can be made available to a Mind. 35 | """ 36 | 37 | # Integration name (e.g. postgres) 38 | type: str 39 | 40 | # Args for connecting to database. 41 | connection_args: dict 42 | 43 | # Tables to make available to the Mind (defaults to ALL). 44 | tables: List[str] = [] 45 | 46 | 47 | class FileConfig(DataSourceConfig): 48 | """ 49 | Represents a collection of files that can be made available to a Mind. 50 | """ 51 | 52 | # Local file paths and/or URLs. 53 | paths: List[str] 54 | 55 | # TODO: Configure Vector storage. Use defaults for now. 56 | 57 | 58 | class WebConfig(DataSourceConfig): 59 | """ 60 | Represents a collection of URLs that can be crawled and made available to a Mind. 61 | """ 62 | 63 | # Base URLs to crawl from. 64 | urls: List[str] 65 | 66 | # Scrapes all URLs found in the starting page (default). 67 | # 0 = scrape provided URLs only 68 | # -1 = no limit (we should set our own sensible limit) 69 | crawl_depth: int = 1 70 | 71 | # Include only URLs that match regex patterns. 72 | filters: List[str] = [ ] 73 | 74 | 75 | # Create mind entity util function 76 | def create_mind( 77 | base_url: str, 78 | api_key: str, 79 | name: str, 80 | data_source_configs: List[DataSourceConfig] = None, 81 | model: Optional[str] = None, 82 | ) -> Mind: 83 | """ 84 | Create a mind entity in LiteLLM proxy. 85 | 86 | Args: 87 | base_url (str): MindsDB base URL 88 | api_key (str): MindsDB API key 89 | name (str): Mind name 90 | data_source_configs (List[DataSourceConfig]): Data sources to make available to the mind 91 | model: Model orchestrating the AI reasoning loop 92 | 93 | Returns: 94 | Mind: Mind entity 95 | """ 96 | warnings.simplefilter('always', DeprecationWarning) # turn off filter 97 | warnings.warn( 98 | 'Minds in python SDK are deprecated. Use minds SDK instead (`pip install minds-sdk`)', 99 | category=DeprecationWarning 100 | ) 101 | warnings.simplefilter('default', DeprecationWarning) 102 | 103 | url = f"{base_url.rstrip('/')}/minds" 104 | headers = {"Authorization": f"Bearer {api_key}"} 105 | if data_source_configs is None: 106 | data_source_configs = [] 107 | payload = { 108 | "name": name, 109 | "data_source_configs": [d.model_dump() for d in data_source_configs], 110 | "model": model 111 | } 112 | try: 113 | response = requests.post(url, json=payload, headers=headers) 114 | response.raise_for_status() 115 | except requests.exceptions.HTTPError as e: 116 | try: 117 | error_message = e.response.json() 118 | except Exception: 119 | error_message = str(e) 120 | logger.error(f"Failed to create mind: {error_message}") 121 | raise e 122 | except Exception as e: 123 | logger.error(f"Failed to create mind: {e}") 124 | raise e 125 | 126 | name = response.json()['name'] 127 | 128 | return Mind(name=name) 129 | -------------------------------------------------------------------------------- /mindsdb_sdk/utils/objects_collection.py: -------------------------------------------------------------------------------- 1 | import re 2 | from typing import Iterable 3 | 4 | 5 | class CollectionBase: 6 | 7 | def __dir__(self) -> Iterable[str]: 8 | internal_methods = ['create', 'drop', 'get', 'list'] 9 | 10 | items = [item.name for item in self.list()] 11 | 12 | items = [i for i in items if re.match('^(?![0-9])\w+$', i)] 13 | return internal_methods + items 14 | 15 | def __getattr__(self, name): 16 | if name.startswith('__'): 17 | raise AttributeError(name) 18 | 19 | return self.get(name) 20 | 21 | 22 | # class MethodCollection(CollectionBase): 23 | # 24 | # def __init__(self, name, methods): 25 | # self.name = name 26 | # self.methods = methods 27 | # 28 | # def __repr__(self): 29 | # return f'{self.__class__.__name__}({self.name})' 30 | # 31 | # def get(self, *args, **kwargs): 32 | # method = self.methods.get('get') 33 | # if method is None: 34 | # raise NotImplementedError() 35 | # 36 | # return method(*args, **kwargs) 37 | # 38 | # def list(self, *args, **kwargs): 39 | # method = self.methods.get('list') 40 | # if method is None: 41 | # raise NotImplementedError() 42 | # 43 | # return method(*args, **kwargs) 44 | # 45 | # def create(self, *args, **kwargs): 46 | # method = self.methods.get('create') 47 | # if method is None: 48 | # raise NotImplementedError() 49 | # 50 | # return method(*args, **kwargs) 51 | # 52 | # def drop(self, name): 53 | # method = self.methods.get('drop') 54 | # if method is None: 55 | # raise NotImplementedError() 56 | # 57 | # return method(name) 58 | -------------------------------------------------------------------------------- /mindsdb_sdk/utils/openai.py: -------------------------------------------------------------------------------- 1 | import json 2 | from logging import getLogger 3 | from typing import List 4 | 5 | import openai 6 | from openai.types.chat import ChatCompletionToolChoiceOptionParam 7 | 8 | from mindsdb_sdk.databases import Database 9 | from tenacity import retry, wait_random_exponential, stop_after_attempt 10 | 11 | 12 | DEFAULT_RETRY_MULTIPLIER = 1 13 | DEFAULT_MAX_WAIT = 40 14 | DEFAULT_STOP_AFTER_ATTEMPT = 3 15 | 16 | logger = getLogger(__name__) 17 | 18 | 19 | @retry(wait=wait_random_exponential(multiplier=DEFAULT_RETRY_MULTIPLIER, max=DEFAULT_MAX_WAIT), stop=stop_after_attempt( 20 | DEFAULT_RETRY_MULTIPLIER 21 | )) 22 | def chat_completion_request( 23 | client: openai.OpenAI, 24 | model: str, 25 | messages: List[dict], 26 | tools: List = None, 27 | tool_choice: ChatCompletionToolChoiceOptionParam = None 28 | ): 29 | try: 30 | response = client.chat.completions.create( 31 | model=model, 32 | messages=messages, 33 | tools=tools, 34 | tool_choice=tool_choice, 35 | ) 36 | return response 37 | except Exception as e: 38 | logger.warning("Unable to generate ChatCompletion response") 39 | logger.warning(f"Exception: {e}") 40 | return e 41 | 42 | 43 | def make_openai_tool(function: callable, description: str = None) -> dict: 44 | """ 45 | Make a generic OpenAI tool for a function 46 | 47 | :param function: function to generate metadata for 48 | :param description: description of the function 49 | 50 | :return: dictionary containing function metadata 51 | """ 52 | 53 | import inspect 54 | import docstring_parser 55 | 56 | params = inspect.signature(function).parameters 57 | docstring = docstring_parser.parse(function.__doc__) 58 | 59 | # Get the first line of the docstring as the function description or use the user-provided description 60 | function_description = description or docstring.short_description 61 | 62 | function_dict = { 63 | "type":"function", 64 | "function":{ 65 | "name":function.__name__, 66 | "description":function_description, 67 | "parameters":{ 68 | "type":"object", 69 | "properties":{}, 70 | "required":[] 71 | } 72 | } 73 | } 74 | 75 | for name, param in params.items(): 76 | param_description = next((p.description for p in docstring.params if p.arg_name == name), '') 77 | 78 | # convert annotation type to string 79 | if param.annotation is not inspect.Parameter.empty: 80 | if inspect.isclass(param.annotation): 81 | param_type = param.annotation.__name__ 82 | else: 83 | param_type = str(param.annotation) 84 | else: 85 | param_type = None 86 | 87 | function_dict["function"]["parameters"]["properties"][name] = { 88 | "type":param_type, 89 | "description":param_description 90 | } 91 | 92 | # Check if parameter is required 93 | if param.default == inspect.Parameter.empty: 94 | function_dict["function"]["parameters"]["required"].append(name) 95 | 96 | return function_dict 97 | 98 | 99 | def make_query_tool(schema: dict) -> dict: 100 | """ 101 | Make an OpenAI tool for querying a database connection in MindsDB 102 | 103 | :param schema: database schema 104 | 105 | :return: dictionary containing function metadata for openai tools 106 | """ 107 | return { 108 | "type":"function", 109 | "function":{ 110 | "name":"query_database", 111 | "description":"Use this function to answer user questions. Input should be a fully formed SQL query.", 112 | "parameters":{ 113 | "type":"object", 114 | "properties":{ 115 | "query":{ 116 | "type":"string", 117 | "description":f""" 118 | SQL query extracting info to answer the user's question. 119 | SQL should be written using this database schema: 120 | {schema} 121 | The query should be returned in plain text, not in JSON. 122 | """, 123 | } 124 | }, 125 | "required":["query"], 126 | }, 127 | } 128 | } 129 | 130 | 131 | def make_data_tool( 132 | model: str, 133 | data_source: str, 134 | description: str, 135 | connection_args: dict 136 | ): 137 | """ 138 | tool passing mindsdb database connection details for datasource to litellm callback 139 | 140 | :param model: model name for text to sql completion 141 | :param data_source: data source name 142 | :param description: description of the data source 143 | :param connection_args: connection arguments for the data source 144 | 145 | :return: dictionary containing function metadata for openai tools 146 | """ 147 | # Convert the connection_args dictionary to a JSON object 148 | connection_args_json = json.dumps(connection_args) 149 | 150 | tool_description = f""" 151 | Queries the provided data source about user data. When calling this function, ALWAYS use the following arguments: 152 | - model: {model} 153 | - connection_args: {connection_args_json} 154 | - data_source: {data_source} 155 | - description: {description} 156 | """ 157 | 158 | return { 159 | "type":"function", 160 | "function":{ 161 | "name":"get_mindsdb_text_to_sql_completion", 162 | "description":tool_description, 163 | "parameters":{ 164 | "type":"object", 165 | "properties":{ 166 | "model":{ 167 | "type":"string", 168 | "description":"llm model name to use for text to sql completion", 169 | }, 170 | "data_source":{ 171 | "type":"string", 172 | "description":"Data source name", 173 | }, 174 | "connection_args":{ 175 | "type":"string", 176 | "description":"Connection arguments for the data source", 177 | }, 178 | "description":{ 179 | "type":"string", 180 | "description":"Description of the data source", 181 | } 182 | }, 183 | "required": ['data_source', 'connection_args', 'model', 'description'] 184 | } 185 | } 186 | } 187 | 188 | 189 | def extract_sql_query(result: str) -> str: 190 | """ 191 | Extract the SQL query from an openai result string 192 | 193 | :param result: OpenAI result string 194 | :return: SQL query string 195 | """ 196 | # Split the result into lines 197 | lines = result.split('\n') 198 | 199 | # Initialize an empty string to hold the query 200 | query = "" 201 | 202 | # Initialize a flag to indicate whether we're currently reading the query 203 | reading_query = False 204 | 205 | # Iterate over the lines 206 | for line in lines: 207 | # If the line starts with "SQLQuery:", start reading the query 208 | if line.startswith("SQLQuery:"): 209 | query = line[len("SQLQuery:"):].strip() 210 | reading_query = True 211 | # If the line starts with "SQLResult:", stop reading the query 212 | elif line.startswith("SQLResult:"): 213 | break 214 | # If we're currently reading the query, append the line to the query 215 | elif reading_query: 216 | query += " " + line.strip() 217 | 218 | # If no line starts with "SQLQuery:", return None 219 | if query == "": 220 | return None 221 | 222 | return query 223 | 224 | 225 | def query_database(database: Database, query: str) -> str: 226 | """ 227 | Execute a query on a database connection 228 | 229 | :param database: mindsdb Database object 230 | :param query: SQL query string 231 | 232 | :return: query results as a string 233 | """ 234 | try: 235 | results = str( 236 | database.query(query).fetch() 237 | ) 238 | except Exception as e: 239 | results = f"query failed with error: {e}" 240 | return results 241 | 242 | 243 | def execute_function_call(message, database: Database = None) -> str: 244 | """ 245 | Execute a function call in a message 246 | 247 | """ 248 | if message.tool_calls[0].function.name == "query_database": 249 | query = json.loads(message.tool_calls[0].function.arguments)["query"] 250 | results = query_database(database, query) 251 | else: 252 | results = f"Error: function {message.tool_calls[0].function.name} does not exist" 253 | return results 254 | 255 | 256 | def pretty_print_conversation(messages): 257 | # you will need to pip install termcolor 258 | from termcolor import colored 259 | role_to_color = { 260 | "system":"red", 261 | "user":"green", 262 | "assistant":"blue", 263 | "function":"magenta", 264 | } 265 | 266 | for message in messages: 267 | if message["role"] == "system": 268 | logger.info(colored(f"system: {message['content']}\n", role_to_color[message["role"]])) 269 | elif message["role"] == "user": 270 | logger.info(colored(f"user: {message['content']}\n", role_to_color[message["role"]])) 271 | elif message["role"] == "assistant" and message.get("function_call"): 272 | logger.info(colored(f"assistant: {message['function_call']}\n", role_to_color[message["role"]])) 273 | elif message["role"] == "assistant" and not message.get("function_call"): 274 | logger.info(colored(f"assistant: {message['content']}\n", role_to_color[message["role"]])) 275 | elif message["role"] == "function": 276 | logger.info(colored(f"function ({message['name']}): {message['content']}\n", role_to_color[message["role"]])) -------------------------------------------------------------------------------- /mindsdb_sdk/utils/sql.py: -------------------------------------------------------------------------------- 1 | from mindsdb_sql_parser.ast import BinaryOperation, Identifier, Constant, Select, Star, NativeQuery 2 | from mindsdb_sdk.query import Query 3 | 4 | 5 | def dict_to_binary_op(filters): 6 | where = None 7 | for name, value in filters.items(): 8 | condition = BinaryOperation('=', args=[Identifier(name), Constant(value)]) 9 | 10 | where = add_condition(where, condition) 11 | 12 | return where 13 | 14 | 15 | def add_condition(where, condition): 16 | if where is None: 17 | return condition 18 | else: 19 | return BinaryOperation( 20 | 'and', 21 | args=[where, condition] 22 | ) 23 | 24 | 25 | def query_to_native_query(query: Query): 26 | return Select( 27 | targets=[Star()], 28 | from_table= NativeQuery( 29 | integration=Identifier(query.database), 30 | query=query.sql 31 | ) 32 | ) -------------------------------------------------------------------------------- /mindsdb_sdk/utils/table_schema.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import pandas as pd 4 | 5 | from mindsdb_sdk.databases import Databases 6 | 7 | 8 | N_ROWS = 10 9 | 10 | 11 | def get_dataframe_schema(df: pd.DataFrame): 12 | """ 13 | Get the schema of a DataFrame 14 | 15 | :param df: DataFrame 16 | 17 | :return: list of dictionaries containing column names and types 18 | """ 19 | # Get the dtypes Series 20 | try: 21 | df = df.convert_dtypes() 22 | except Exception as e: 23 | raise f"Error converting dtypes: {e}" 24 | 25 | dtypes = df.dtypes 26 | 27 | # Convert the dtypes Series into a list of dictionaries 28 | schema = [{"name": column, "type": dtype.name} for column, dtype in dtypes.items()] 29 | 30 | return schema 31 | 32 | 33 | def get_table_schemas(database: Databases, included_tables: List[str] = None, n_rows: int = N_ROWS) -> dict: 34 | """ 35 | Get table schemas from a database 36 | 37 | :param database: database object 38 | :param included_tables: list of table names to get schemas for 39 | :param n_rows: number of rows to fetch from each table 40 | 41 | :return: dictionary containing table schemas 42 | """ 43 | 44 | tables = [table.name for table in database.tables.list()] 45 | 46 | if included_tables: 47 | tables = [table for table in tables if table in included_tables] 48 | 49 | table_schemas = {} 50 | for table in tables: 51 | table_df = database.get_table(table).limit(n_rows).fetch() 52 | table_schemas[table] = get_dataframe_schema(table_df) 53 | 54 | return table_schemas 55 | -------------------------------------------------------------------------------- /mindsdb_sdk/views.py: -------------------------------------------------------------------------------- 1 | from typing import List, Union 2 | 3 | from mindsdb_sql_parser.ast.mindsdb import CreateView 4 | from mindsdb_sql_parser.ast import DropView 5 | from mindsdb_sql_parser.ast import Identifier 6 | 7 | from mindsdb_sdk.utils.objects_collection import CollectionBase 8 | 9 | from .query import Query 10 | from .tables import Table 11 | 12 | 13 | class View(Table): 14 | # The same as table 15 | pass 16 | 17 | 18 | # TODO getting view sql from api not implemented yet 19 | # class View(Table): 20 | # def __init__(self, api, data, project): 21 | # super().__init__(api, data['name'], project) 22 | # self.view_sql = data['sql'] 23 | # 24 | # def __repr__(self): 25 | # # 26 | # sql = self.view_sql.replace('\n', ' ') 27 | # if len(sql) > 40: 28 | # sql = sql[:37] + '...' 29 | # 30 | # return f'{self.__class__.__name__}({self.name}{self._filters_repr()}, sql={sql})' 31 | 32 | class Views(CollectionBase): 33 | """ 34 | **Views** 35 | 36 | Get: 37 | 38 | >>> views = views.list() 39 | >>> view = views[0] 40 | 41 | By name: 42 | 43 | >>> view = views.get('view1') 44 | 45 | Create: 46 | 47 | >>> view = views.create( 48 | ... 'view1', 49 | ... database='example_db', # optional, can also be database object 50 | ... query='select * from table1' 51 | ...) 52 | 53 | Create using query object: 54 | 55 | >>> view = views.create( 56 | ... 'view1', 57 | ... query=database.query('select * from table1') 58 | ...) 59 | 60 | Getting data: 61 | 62 | >>> view = view.filter(a=1, b=2) 63 | >>> view = view.limit(100) 64 | >>> df = view.fetch() 65 | 66 | Drop view: 67 | 68 | >>> views.drop('view1') 69 | 70 | """ 71 | 72 | def __init__(self, project, api): 73 | self.project = project 74 | self.api = api 75 | 76 | 77 | # The same as table 78 | def _list_views(self): 79 | df = self.api.objects_tree(self.project.name) 80 | df = df[df.type == 'view'] 81 | 82 | return list(df['name']) 83 | 84 | def list(self) -> List[View]: 85 | """ 86 | Show list of views in project 87 | 88 | :return: list of View objects 89 | """ 90 | return [View(self.project, name) for name in self._list_views()] 91 | 92 | def create(self, name: str, sql: Union[str, Query], database: str = None) -> View: 93 | """ 94 | Create new view in project and return it 95 | 96 | :param name: name of the view 97 | :param sql: sql query as string or query object 98 | :param database: datasource of the view (where input sql will be executed) 99 | :return: View object 100 | """ 101 | if isinstance(sql, Query): 102 | database = sql.database 103 | sql = sql.sql 104 | elif not isinstance(sql, str): 105 | raise ValueError(sql) 106 | 107 | if database is not None: 108 | database = Identifier(database) 109 | ast_query = CreateView( 110 | name=Identifier(name), 111 | query_str=sql, 112 | from_table=database 113 | ) 114 | 115 | self.project.query(ast_query.to_string()).fetch() 116 | return View(self.project, name) 117 | 118 | def drop(self, name: str): 119 | """ 120 | Drop view from project 121 | 122 | :param name: name of the view 123 | """ 124 | 125 | ast_query = DropView(names=[Identifier(name)]) 126 | 127 | self.project.query(ast_query.to_string()).fetch() 128 | 129 | def get(self, name: str) -> View: 130 | """ 131 | Get view by name from project 132 | 133 | :param name: name of the view 134 | :return: View object 135 | """ 136 | 137 | if name not in self._list_views(): 138 | raise AttributeError("View doesn't exist") 139 | return View(self.project, name) 140 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | pandas >= 1.3.5 3 | mindsdb-sql-parser ~= 0.0.0 4 | docstring-parser >= 0.7.3 5 | tenacity >= 8.0.1 6 | openai >= 1.74.1 7 | sseclient-py >= 1.8.0 8 | validators == 0.20.0 9 | -------------------------------------------------------------------------------- /requirements_test.txt: -------------------------------------------------------------------------------- 1 | pytest 2 | pytest-cov 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | about = {} 4 | with open("mindsdb_sdk/__about__.py") as fp: 5 | exec(fp.read(), about) 6 | 7 | with open("README.md", "r") as fh: 8 | long_description = fh.read() 9 | 10 | with open('requirements.txt') as req_file: 11 | requirements = req_file.read().splitlines() 12 | 13 | setup( 14 | name=about['__title__'], 15 | version=about['__version__'], 16 | url=about['__github__'], 17 | download_url=about['__pypi__'], 18 | license=about['__license__'], 19 | author=about['__author__'], 20 | author_email=about['__email__'], 21 | description=about['__description__'], 22 | long_description=long_description, 23 | long_description_content_type="text/markdown", 24 | packages=find_packages(exclude=('tests*',)), 25 | install_requires=requirements, 26 | extras_require={ 27 | 'dev': [ 28 | 'pytest', 29 | ] 30 | }, 31 | classifiers=[ 32 | "Programming Language :: Python :: 3", 33 | "License :: OSI Approved :: MIT License", 34 | "Operating System :: OS Independent", 35 | ], 36 | python_requires=">=3.6", 37 | ) 38 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mindsdb/mindsdb_python_sdk/c8c51f2597f04d30e6d16b798a304153e7bd9233/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_agent_stream_process.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import logging 3 | 4 | from mindsdb_sdk.utils.agents import MindsDBSQLStreamParser 5 | 6 | @pytest.fixture 7 | def parser(): 8 | return MindsDBSQLStreamParser(log_level=logging.INFO) 9 | 10 | def test_initialization(parser): 11 | assert isinstance(parser, MindsDBSQLStreamParser) 12 | assert parser.logger.level == logging.INFO 13 | 14 | def test_stream_and_parse_sql_query_with_dict(parser): 15 | mock_stream = [ 16 | {'output': 'Test output', 'type': 'text'}, 17 | {'type': 'sql', 'content': 'SELECT * FROM table'}, 18 | {'output': 'More output'} 19 | ] 20 | 21 | generator = parser.stream_and_parse_sql_query(iter(mock_stream)) 22 | results = list(generator) 23 | 24 | assert len(results) == 3 25 | assert results[0] == {'output': 'Test output', 'sql_query': None} 26 | assert results[1] == {'output': '', 'sql_query': 'SELECT * FROM table'} 27 | assert results[2] == {'output': 'More output', 'sql_query': None} 28 | 29 | def test_stream_and_parse_sql_query_with_string(parser): 30 | mock_stream = ['String chunk 1', 'String chunk 2'] 31 | 32 | generator = parser.stream_and_parse_sql_query(iter(mock_stream)) 33 | results = list(generator) 34 | 35 | assert len(results) == 2 36 | assert results[0] == {'output': 'String chunk 1', 'sql_query': None} 37 | assert results[1] == {'output': 'String chunk 2', 'sql_query': None} 38 | 39 | 40 | def test_process_stream(parser, caplog): 41 | mock_stream = [ 42 | {'output':'First output'}, 43 | {'type':'sql', 'content':'SELECT * FROM users'}, 44 | {'output':'Second output'} 45 | ] 46 | 47 | with caplog.at_level(logging.INFO): 48 | full_response, sql_query = parser.process_stream(iter(mock_stream)) 49 | 50 | assert full_response == 'First outputSecond output' 51 | assert sql_query == 'SELECT * FROM users' 52 | 53 | # Check for specific log messages 54 | assert 'Starting to process completion stream...' in caplog.text 55 | assert 'Output: First output' in caplog.text 56 | assert 'Extracted SQL Query: SELECT * FROM users' in caplog.text 57 | assert 'Output: Second output' in caplog.text 58 | assert f'Full Response: {full_response}' in caplog.text 59 | assert f'Final SQL Query: {sql_query}' in caplog.text 60 | 61 | def test_process_stream_no_sql(parser): 62 | mock_stream = [ 63 | {'output': 'First output'}, 64 | {'output': 'Second output'} 65 | ] 66 | 67 | full_response, sql_query = parser.process_stream(iter(mock_stream)) 68 | 69 | assert full_response == 'First outputSecond output' 70 | assert sql_query is None 71 | -------------------------------------------------------------------------------- /tests/test_openai.py: -------------------------------------------------------------------------------- 1 | import json 2 | from unittest.mock import patch, MagicMock 3 | from mindsdb_sdk.utils import openai 4 | 5 | 6 | def test_chat_completion_request_success(): 7 | mock_client = MagicMock() 8 | mock_client.chat.completions.create.return_value = "Test Response" 9 | response = openai.chat_completion_request(mock_client, "text-davinci-002", [{"role": "system", "content": "You are a helpful assistant."}]) 10 | assert response == "Test Response" 11 | 12 | 13 | def test_make_openai_tool(): 14 | def test_func(a: int, b: str) -> str: 15 | """This is a test function""" 16 | return b * a 17 | tool = openai.make_openai_tool(test_func) 18 | assert tool["function"]["name"] == "test_func" 19 | assert tool["function"]["description"] == "This is a test function" 20 | assert tool["function"]["parameters"]["properties"]["a"]["type"] == "int" 21 | assert tool["function"]["parameters"]["properties"]["b"]["type"] == "str" 22 | 23 | 24 | def test_extract_sql_query(): 25 | result = "SQLQuery: SELECT * FROM test_table\nSQLResult: [{'column1': 'value1', 'column2': 'value2'}]" 26 | query = openai.extract_sql_query(result) 27 | assert query == "SELECT * FROM test_table" 28 | 29 | 30 | def test_extract_sql_query_no_query(): 31 | result = "SQLResult: [{'column1': 'value1', 'column2': 'value2'}]" 32 | query = openai.extract_sql_query(result) 33 | assert query is None 34 | 35 | 36 | @patch("mindsdb_sdk.utils.openai.query_database") 37 | def test_execute_function_call_query_database(mock_query_database): 38 | mock_query_database.return_value = "Test Result" 39 | mock_message = MagicMock() 40 | mock_message.tool_calls[0].function.name = "query_database" 41 | mock_message.tool_calls[0].function.arguments = json.dumps({"query": "SELECT * FROM test_table"}) 42 | result = openai.execute_function_call(mock_message, MagicMock()) 43 | assert result == "Test Result" 44 | 45 | 46 | def test_execute_function_call_no_function(): 47 | mock_message = MagicMock() 48 | mock_message.tool_calls[0].function.name = "non_existent_function" 49 | result = openai.execute_function_call(mock_message, MagicMock()) 50 | assert result == "Error: function non_existent_function does not exist" 51 | --------------------------------------------------------------------------------