├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature-mindsdb-request.md
    ├── PULL_REQUEST_TEMPLATE
    │   └── pull_request_template.md
    └── workflows
    │   ├── add_to_pr_review.yml
    │   ├── add_to_roadmap_project_v2.yml
    │   ├── cla.yml
    │   ├── docs.yml
    │   ├── release.yml
    │   └── test_prs.yml
├── .gitignore
├── CODE_OF_CONDUCT.md
├── CONTRIBUTING.md
├── LICENSE
├── MANIFEST.in
├── README.md
├── assets
    └── contributions-agreement
    │   └── cla.json
├── docs
    ├── Makefile
    ├── make.bat
    ├── requirements.txt
    └── source
    │   ├── agents.rst
    │   ├── conf.py
    │   ├── connection.rst
    │   ├── database.rst
    │   ├── handlers.rst
    │   ├── index.rst
    │   ├── jobs.rst
    │   ├── knowledge_bases.rst
    │   ├── ml_engines.rst
    │   ├── model.rst
    │   ├── project.rst
    │   ├── query.rst
    │   ├── server.rst
    │   ├── skills.rst
    │   ├── tables.rst
    │   └── views.rst
├── examples
    ├── data
    │   └── tokaido-rulebook.pdf
    ├── home_rentals.py
    ├── using_agents.py
    ├── using_agents_with_retrieval.py
    ├── using_agents_with_streaming_with_retrieval.py
    ├── using_agents_with_text2sql.py
    ├── using_agents_with_text2sql_streaming.py
    ├── using_database_mind_text2sql.py
    ├── using_openai.py
    └── working_with_tables.py
├── mindsdb_sdk
    ├── __about__.py
    ├── __init__.py
    ├── agents.py
    ├── connect.py
    ├── connectors
    │   ├── __init__.py
    │   └── rest_api.py
    ├── databases.py
    ├── handlers.py
    ├── jobs.py
    ├── knowledge_bases.py
    ├── ml_engines.py
    ├── models.py
    ├── projects.py
    ├── query.py
    ├── server.py
    ├── skills.py
    ├── tables.py
    ├── utils
    │   ├── __init__.py
    │   ├── agents.py
    │   ├── context.py
    │   ├── mind.py
    │   ├── objects_collection.py
    │   ├── openai.py
    │   ├── sql.py
    │   └── table_schema.py
    └── views.py
├── requirements.txt
├── requirements_test.txt
├── setup.py
└── tests
    ├── __init__.py
    ├── test_agent_stream_process.py
    ├── test_openai.py
    └── test_sdk.py


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | labels:
 5 | ---
 6 | 
 7 | **Your Environment**
 8 | 
 9 | * Python version:
10 | * Operating system:
11 | * Mindsdb Python SDK version:
12 | * Additional info if applicable:
13 | 
14 | **Please describe your issue and how we can replicate it**
15 | 
16 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature-mindsdb-request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature Mindsdb Python SDK request
 3 | about: Suggest an idea for this project
 4 | labels: 
 5 | ---
 6 | 
 7 | **Is your feature request related to a problem? Please describe.**
 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
 9 | 
10 | **Describe the solution you'd like**
11 | A clear and concise description of what you want to happen.
12 | 
13 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE/pull_request_template.md:
--------------------------------------------------------------------------------
1 | Fixes #
2 | 
3 | ## Please describe what changes you made in as much detail as possible
4 |   -
5 | 
6 | 


--------------------------------------------------------------------------------
/.github/workflows/add_to_pr_review.yml:
--------------------------------------------------------------------------------
 1 | name: Add Pull Requests to PR review project
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     types:
 6 |       - opened
 7 | 
 8 | jobs:
 9 |   add-to-project:
10 |     name: Add issue to project
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       - uses: actions/add-to-project@v0.5.0
14 |         with:
15 |           project-url: https://github.com/orgs/mindsdb/projects/65
16 |           github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}
17 | 


--------------------------------------------------------------------------------
/.github/workflows/add_to_roadmap_project_v2.yml:
--------------------------------------------------------------------------------
 1 | name: Add issue to roadmap project
 2 | on:
 3 |   issues:
 4 |     types:
 5 |       - opened
 6 | jobs:
 7 |   add-to-project:
 8 |     name: Add issue to roadmap project
 9 |     runs-on: ubuntu-latest
10 |     steps:
11 |       - uses: actions/add-to-project@v0.4.0
12 |         with:
13 |           project-url: https://github.com/orgs/mindsdb/projects/53
14 |           github-token: ${{ secrets.ADD_TO_PROJECT_PAT }}


--------------------------------------------------------------------------------
/.github/workflows/cla.yml:
--------------------------------------------------------------------------------
 1 | name: "MindsDB CLA Assistant"
 2 | on:
 3 |   issue_comment:
 4 |     types: [created]
 5 |   pull_request_target:
 6 |     types: [opened,closed,synchronize]
 7 | 
 8 | permissions:
 9 |   actions: write
10 |   contents: write
11 |   pull-requests: write
12 |   statuses: write
13 |  
14 | jobs:
15 |   CLAssistant:
16 |     runs-on: mdb-dev
17 |     steps:
18 |       - name: "CLA Assistant"
19 |         if: (github.event.comment.body == 'recheck' || github.event.comment.body == 'I have read the CLA Document and I hereby sign the CLA') || github.event_name == 'pull_request_target'
20 |         uses: contributor-assistant/github-action@v2.6.1
21 |         env:
22 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
23 |         with:
24 |           path-to-signatures: 'assets/contributions-agreement/cla.json'
25 |           path-to-document: 'https://github.com/mindsdb/mindsdb/blob/main/assets/contributions-agreement/individual-contributor.md'
26 |           branch: 'cla'
27 |           allowlist: bot*, ZoranPandovski, torrmal, Stpmax, mindsdbadmin, ea-rus, tmichaeldb, dusvyat, hamishfagg, MinuraPunchihewa, martyna-mindsdb, lucas-koontz
28 | 


--------------------------------------------------------------------------------
/.github/workflows/docs.yml:
--------------------------------------------------------------------------------
 1 | name: "Pull Request Docs Check"
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - docs
 7 | 
 8 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
 9 | permissions:
10 |   contents: read
11 |   pages: write
12 |   id-token: write
13 | 
14 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued.
15 | # However, do NOT cancel in-progress runs as we want to allow these production deployments to complete.
16 | concurrency:
17 |   group: "pages"
18 |   cancel-in-progress: false
19 | 
20 | jobs:
21 |   docs:
22 |     runs-on: ubuntu-latest
23 |     steps:
24 |     - uses: actions/checkout@v1
25 |     - uses: ammaraskar/sphinx-action@master
26 |       with:
27 |         docs-folder: "docs/"
28 |     - name: Setup Pages
29 |       uses: actions/configure-pages@v3
30 |     - name: Upload artifact
31 |       uses: actions/upload-pages-artifact@v1
32 |       with:
33 |         path: 'docs/build/html'
34 |     - name: Deploy to GitHub Pages
35 |       id: deployment
36 |       uses: actions/deploy-pages@v2
37 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [published]
 6 | 
 7 | jobs:
 8 |   test:
 9 |     runs-on: ${{ matrix.os }}
10 |     strategy:
11 |       matrix:
12 |         os: [ubuntu-latest]
13 |         python-version: ['3.8', '3.9', '3.10', '3.11']
14 |     steps:
15 |     - uses: actions/checkout@v2
16 |     - name: Set up Python ${{ matrix.python-version }}
17 |       uses: actions/setup-python@v2
18 |       with:
19 |         python-version: ${{ matrix.python-version }}
20 |     - name: Install dependencies
21 |       run: |
22 |         python -m pip install --upgrade pip==22.0.4
23 |         pip install -r requirements.txt
24 |         pip install -r requirements_test.txt
25 |         pip install --no-cache-dir .
26 |     - name: Run tests
27 |       run: |
28 |         if [ "$RUNNER_OS" == "Linux" ]; then
29 |         
30 |           env PYTHONPATH=./ pytest tests/
31 | 
32 |         fi
33 |       shell: bash
34 | 
35 | 
36 |   deploy:
37 |     runs-on: ubuntu-latest
38 |     needs: test
39 |     steps:
40 |     - uses: actions/checkout@v2
41 |     - name: Set up Python
42 |       uses: actions/setup-python@v2
43 |       with:
44 |         python-version: '3.9'
45 |     - name: Install dependencies
46 |       run: |
47 |         python -m pip install --upgrade pip==20.2.4
48 |         pip install setuptools wheel twine
49 |     - name: Build and publish
50 |       env:
51 |         TWINE_USERNAME:  __token__
52 |         TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
53 |       run: |
54 |         python setup.py sdist
55 |         twine upload dist/*
56 | 


--------------------------------------------------------------------------------
/.github/workflows/test_prs.yml:
--------------------------------------------------------------------------------
 1 | name: PR workflow
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - main
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ${{ matrix.os }}
11 |     strategy:
12 |       matrix:
13 |         os: [ubuntu-latest]
14 |         python-version: ['3.8', '3.9', '3.10', '3.11']
15 |     steps:
16 |     - uses: actions/checkout@v2
17 |     - name: Set up Python ${{ matrix.python-version }}
18 |       uses: actions/setup-python@v2
19 |       with:
20 |         python-version: ${{ matrix.python-version }}
21 |     - name: Install dependencies
22 |       run: |
23 |         python -m pip install --upgrade pip==22.0.4
24 |         pip install -r requirements.txt        
25 |         pip install -r requirements_test.txt
26 |         pip install --no-cache-dir .
27 |     - name: Run tests
28 |       run: |
29 |         if [ "$RUNNER_OS" == "Linux" ]; then
30 |         
31 |           env PYTHONPATH=./ pytest tests/
32 | 
33 |         fi
34 |       shell: bash
35 | 
36 |   coverage:
37 |     needs: test
38 |     if: github.ref != 'refs/heads/stable'
39 |     runs-on: ubuntu-latest
40 |     permissions:
41 |       pull-requests: write
42 |     steps:
43 |       - uses: actions/checkout@v3
44 |       - name: Set up Python 3.8
45 |         uses: actions/setup-python@v2
46 |         with:
47 |           python-version: 3.8
48 | 
49 |       - name: Install dependencies
50 |         run: |
51 |           python -m pip install --upgrade pip
52 |           pip install flake8
53 |           pip install -r requirements.txt          
54 |           pip install -r requirements_test.txt
55 | 
56 |       - name: Build coverage file
57 |         run: |
58 |           pytest --junitxml=pytest.xml --cov-report=term-missing:skip-covered --cov=mindsdb_sdk tests/ | tee pytest-coverage.txt
59 | 
60 |       - name: Pytest coverage comment
61 |         uses: MishaKav/pytest-coverage-comment@main
62 |         with:
63 |           pytest-coverage-path: ./pytest-coverage.txt
64 |           junitxml-path: ./pytest.xml
65 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Distribution / packaging
 7 | .Python
 8 | env/
 9 | build/
10 | develop-eggs/
11 | dist/
12 | downloads/
13 | eggs/
14 | .eggs/
15 | lib/
16 | lib64/
17 | parts/
18 | sdist/
19 | var/
20 | wheels/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | share/python-wheels/
25 | 
26 | # visual studio code
27 | .DStore
28 | .DS_Store
29 | .idea
30 | .vscode
31 | 
32 | # virtualenv
33 | .venv
34 | venv/
35 | ENV/
36 | 
37 | # pyenv
38 | .python-version
39 | 
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 | results
44 | mlp_img
45 | 
46 | tests/home_rentals.csv
47 | tests/credentials.txt
48 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Contributor Covenant Code of Conduct
 3 | 
 4 | ## Our Pledge
 5 | 
 6 | We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation.
 7 | 
 8 | We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community.
 9 | 
10 | ## Our Standards
11 | 
12 | Examples of behavior that contributes to a positive environment for our community include:
13 | 
14 | * Demonstrating empathy and kindness toward other people
15 | * Being respectful of differing opinions, viewpoints, and experiences
16 | * Giving and gracefully accepting constructive feedback
17 | * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience
18 | * Focusing on what is best not just for us as individuals, but for the overall community
19 | 
20 | Examples of unacceptable behavior include:
21 | 
22 | * The use of sexualized language or imagery, and sexual attention or
23 |   advances of any kind
24 | * Trolling, insulting or derogatory comments, and personal or political attacks
25 | * Public or private harassment
26 | * Publishing others' private information, such as a physical or email
27 |   address, without their explicit permission
28 | * Other conduct which could reasonably be considered inappropriate in a
29 |   professional setting
30 | 
31 | ## Enforcement Responsibilities
32 | 
33 | Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful.
34 | 
35 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate.
36 | 
37 | ## Scope
38 | 
39 | This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event.
40 | 
41 | ## Enforcement
42 | 
43 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at admin@mindsdb.com. All complaints will be reviewed and investigated promptly and fairly.
44 | 
45 | All community leaders are obligated to respect the privacy and security of the reporter of any incident.
46 | 
47 | 
48 | ## Attribution
49 | 
50 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.0,
51 | available at https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
52 | 
53 | Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder](https://github.com/mozilla/diversity).
54 | 
55 | [homepage]: https://www.contributor-covenant.org
56 | 
57 | For answers to common questions about this code of conduct, see the FAQ at
58 | https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations.
59 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Mindsdb
 2 | 
 3 | Being part of the core MindsDB team is accessible to anyone who is motivated and wants to be part of that journey!
 4 | 
 5 | Please see below how to contribute to the project, also refer to the contributing documentation.
 6 | 
 7 | ## How can you help us?
 8 | 
 9 | * Report a bug
10 | * Improve documentation
11 | * Discuss the code implementation
12 | * Submit a bug fix
13 | * Propose new features
14 | * Test Mindsdb
15 | 
16 | ## Code contributions
17 | 
18 | In general, we follow the "fork-and-pull" Git workflow.
19 | 1. Fork the Mindsdb repository
20 | 2. Clone the repository
21 | 3. Make changes and commit them
22 | 4. Push your local branch to your fork
23 | 5. Submit a Pull request so that we can review your changes
24 | 6. Write a commit message
25 | 7. Make sure that the CI tests are GREEN
26 | 
27 | > NOTE: Be sure to merge the latest from "upstream" before making a pull request! Also, make the PR to the staging branch.
28 | 
29 | ## Feature and Bug reports
30 | We use GitHub issues to track bugs and features. Report them by opening a [new issue](https://github.com/mindsdb/mindsdb_python_sdk/issues/new/choose) and fill out all of the required inputs.
31 | 
32 | ## Code review process
33 | 
34 | The Pull Request reviews are done on a regular basis. Please, make sure you respond to our feedback/questions.
35 | 
36 | ## Community
37 | 
38 | If you have additional questions or you want to chat with the MindsDB core team, please join our [Slack community](https://mindsdb.com/joincommunity) or post at [Github Discussions](https://github.com/mindsdb/mindsdb_python_sdk/discussions).
39 |  
40 | To get updates on MindsDB’s latest announcements, releases, and events, sign up for our [Monthly Community Newsletter](https://mindsdb.com/newsletter/?utm_medium=community&utm_source=github&utm_campaign=mindsdb%20repo).
41 | 
42 | Join our mission of democratizing machine learning!
43 | 
44 | ## Contributor Code of Conduct
45 | 
46 | Please note that this project is released with a [Contributor Code of Conduct](https://github.com/mindsdb/mindsdb_python_sdk/blob/stable/CODE_OF_CONDUCT.md). By participating in this project, you agree to abide by its terms.
47 | 
48 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 MindsDB Inc
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | prune tests*
3 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Python MindsDB SDK
  2 | 
  3 | The Python MindsDB SDK allows you to connect to a MindsDB server from Python using the HTTP API.
  4 | 
  5 | ## Installation
  6 | 
  7 | ```
  8 | pip install mindsdb_sdk
  9 | ```
 10 | 
 11 | ## Example
 12 | 
 13 | ### Connecting to the MindsDB server
 14 | 
 15 | You can establish a connection to the MindsDB server using the SDK. Here are some examples:
 16 | 
 17 | #### Connect to a local MindsDB server
 18 | 
 19 | ```python
 20 | import mindsdb_sdk
 21 | con = mindsdb_sdk.connect()
 22 | con = mindsdb_sdk.connect('http://127.0.0.1:47334')
 23 | ```
 24 | 
 25 | #### Connect to the MindsDB Cloud
 26 | 
 27 | ```python
 28 | import mindsdb_sdk
 29 | con = mindsdb_sdk.connect(login='a@b.com', password='-')
 30 | con = mindsdb_sdk.connect('https://cloud.mindsdb.com', login='a@b.com', password='-')
 31 | ```
 32 | 
 33 | ####  Connect to a MindsDB Pro server
 34 | 
 35 | ```python
 36 | import mindsdb_sdk
 37 | con = mindsdb_sdk.connect('http://<YOUR_INSTANCE_IP>', login='a@b.com', password='-', is_managed=True)
 38 | ```
 39 | 
 40 | ## Basic usage
 41 | 
 42 | Once connected to the server, you can perform various operations. Here are some examples:
 43 | 
 44 | ```python
 45 | # Get a list of databases
 46 | databases = con.databases.list()
 47 | 
 48 | # Get a specific database
 49 | database = databases[0]  # Database type object
 50 | 
 51 | # Perform an SQL query
 52 | query = database.query('select * from table1')
 53 | print(query.fetch())
 54 | 
 55 | # Create a table
 56 | table = database.tables.create('table2', query)
 57 | 
 58 | # Get a project
 59 | project = con.projects.proj
 60 | 
 61 | # or use mindsdb project
 62 | project = con
 63 | 
 64 | # Perform an SQL query within a project
 65 | query = project.query('select * from database.table join model1')
 66 | 
 67 | # Create a view
 68 | view = project.views.create('view1', query=query)
 69 | 
 70 | # Get a list of views
 71 | views = project.views.list()
 72 | view = views[0]
 73 | df = view.fetch()
 74 | 
 75 | # Get a list of models
 76 | models = project.models.list()
 77 | model = models[0]
 78 | 
 79 | # Use a model for prediction
 80 | result_df = model.predict(df)
 81 | result_df = model.predict(query)
 82 | 
 83 | # Create a model
 84 | timeseries_options = {
 85 |     'order': 'date',
 86 |     'window': 5,
 87 |     'horizon': 1
 88 | }
 89 | model = project.models.create(
 90 |     'rentals_model',
 91 |     predict='price',
 92 |     query=query,
 93 |     timeseries_options=timeseries_options
 94 | )
 95 | 
 96 | # Describe a model
 97 | model.describe()
 98 | ```
 99 | 
100 | You can find more examples in this [Google colab notebook](
101 | https://colab.research.google.com/drive/1QouwAR3saFb9ffthrIs1LSH5COzyQa11#scrollTo=k6IbwsKRPQCR
102 | )
103 | 
104 | ## Examples
105 | 
106 | https://github.com/mindsdb/mindsdb_python_sdk/tree/staging/examples
107 | 
108 | ## API Documentation
109 | 
110 | The API documentation for the MindsDB SDK can be found at https://mindsdb.github.io/mindsdb_python_sdk/.
111 | 
112 | ### Generating API docs locally:
113 | 
114 | ```commandline
115 | cd docs
116 | pip install -r requirements.txt
117 | make html
118 | ```
119 | 
120 | The online documentation is automatically updated by pushing changes to the docs branch.
121 | 
122 | 
123 | ## Testing
124 | 
125 | To run all the tests for the components, use the following command:
126 | 
127 | ```bash
128 | env PYTHONPATH=./ pytest
129 | ```
130 | 
131 | ## Contributing
132 | 
133 | We welcome contributions to the MindsDB SDK. If you'd like to contribute, please refer to the contribution guidelines for more information.
134 | 
135 | ## License
136 | 
137 | The MindsDB SDK is licensed under the MIT License. Feel free to use and modify it according to your needs
138 | 
139 | 


--------------------------------------------------------------------------------
/assets/contributions-agreement/cla.json:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindsdb/mindsdb_python_sdk/c8c51f2597f04d30e6d16b798a304153e7bd9233/assets/contributions-agreement/cla.json


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | pandas >= 1.3.5
3 | mindsdb-sql >= 0.7.0, < 0.8.0
4 | 
5 | sphinx
6 | sphinx-rtd-theme
7 | 


--------------------------------------------------------------------------------
/docs/source/agents.rst:
--------------------------------------------------------------------------------
1 | Agents
2 | -------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.agents
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | import os
 9 | import sys
10 | sys.path.insert(0, os.path.abspath(os.path.join('..', '..')))
11 | 
12 | project = 'Mindsdb python SDK'
13 | copyright = '2023, MindsDB Inc'
14 | author = 'MindsDB Inc'
15 | 
16 | # -- General configuration ---------------------------------------------------
17 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
18 | 
19 | extensions = [
20 |     'sphinx.ext.autodoc',
21 |     'sphinx.ext.napoleon',
22 |     'sphinx.ext.autosectionlabel'
23 | ]
24 | 
25 | templates_path = ['_templates']
26 | exclude_patterns = []
27 | 
28 | 
29 | 
30 | # -- Options for HTML output -------------------------------------------------
31 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output
32 | 
33 | html_theme = 'sphinx_rtd_theme'
34 | html_static_path = ['_static']
35 | 


--------------------------------------------------------------------------------
/docs/source/connection.rst:
--------------------------------------------------------------------------------
1 | Connection to mindsdb server
2 | ----------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.connect
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/database.rst:
--------------------------------------------------------------------------------
1 | Databases
2 | ----------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.databases
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:


--------------------------------------------------------------------------------
/docs/source/handlers.rst:
--------------------------------------------------------------------------------
1 | Handlers
2 | -------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.handlers
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
  1 | Welcome to Mindsdb python SDK documentation!
  2 | ============================================
  3 | 
  4 | Getting Started
  5 | ===============
  6 | 
  7 | Source code
  8 | -----------
  9 | 
 10 | `<https://github.com/mindsdb/mindsdb_python_sdk>`_
 11 | 
 12 | Installation
 13 | ------------
 14 | 
 15 | .. code-block:: console
 16 | 
 17 |     pip install mindsdb_sdk
 18 | 
 19 | Connect
 20 | -------
 21 | 
 22 | .. code-block:: python
 23 | 
 24 |     import mindsdb_sdk
 25 | 
 26 |     # Connect to local server
 27 | 
 28 |     server = mindsdb_sdk.connect()
 29 |     server = mindsdb_sdk.connect('http://127.0.0.1:47334')
 30 | 
 31 |     # Connect to cloud server
 32 | 
 33 |     server = mindsdb_sdk.connect(email='a@b.com', password='-')
 34 |     server = mindsdb_sdk.connect('https://cloud.mindsdb.com', login='a@b.com', password='-')
 35 | 
 36 |     # Connect to MindsDB Pro
 37 | 
 38 |     server = mindsdb_sdk.connect('http://<YOUR_INSTANCE_IP>', login='a@b.com', password='-', is_managed=True)
 39 | 
 40 | Base usage
 41 | ----------
 42 | 
 43 | .. code-block:: python
 44 | 
 45 |     # database
 46 |     databases = server.list_databases()
 47 | 
 48 |     database = databases[0] # Database type object
 49 | 
 50 |     # sql query
 51 |     query = database.query('select * from table1')
 52 |     print(query.fetch())
 53 | 
 54 |     # create table
 55 |     table = database.create_table('table2', query)
 56 | 
 57 | 
 58 |     # project
 59 |     project = server.get_project('proj')
 60 | 
 61 |     # sql query
 62 |     query = project.query('select * from database.table join model1')
 63 | 
 64 |     # create view
 65 |     view = project.create_view(
 66 |           'view1',
 67 |            query=query
 68 |     )
 69 | 
 70 |     # get view
 71 |     views = project.list_views()
 72 |     view = views[0]
 73 |     df = view.fetch()
 74 | 
 75 |     # get model
 76 |     models = project.list_models()
 77 |     model = models[0]
 78 | 
 79 |     # using model
 80 |     result_df = model.predict(df)
 81 |     result_df = model.predict(query)
 82 | 
 83 |     # create model
 84 |     model = project.create_model(
 85 |           'rentals_model',
 86 |           predict='price',
 87 |           query=query,
 88 |     )
 89 | 
 90 |     More
 91 | 
 92 | More examples
 93 | -------------
 94 | 
 95 | `<https://github.com/mindsdb/mindsdb_python_sdk/examples>`_
 96 | 
 97 | API documentation
 98 | =================
 99 | 
100 | .. toctree::
101 |    :maxdepth: 1
102 |    :caption: Connection:
103 | 
104 |    connection
105 | 
106 | .. toctree::
107 |    :maxdepth: 1
108 |    :caption: Modules:
109 | 
110 |    server
111 |    database
112 | 
113 |    project
114 |    handlers
115 | 
116 |    ml_engines
117 |    model
118 |    tables
119 |    views
120 |    query
121 |    jobs
122 | 
123 |    knowledge_bases
124 |    skills
125 |    agents
126 | 
127 | 
128 | Indices and tables
129 | ------------------
130 | 
131 | * :ref:`genindex`
132 | * :ref:`modindex`
133 | * :ref:`search`


--------------------------------------------------------------------------------
/docs/source/jobs.rst:
--------------------------------------------------------------------------------
 1 | Jobs
 2 | -------------------------
 3 | 
 4 | .. _my-reference-label:
 5 | 
 6 | 
 7 | .. automodule:: mindsdb_sdk.jobs
 8 |    :members:
 9 |    :undoc-members:
10 |    :show-inheritance:


--------------------------------------------------------------------------------
/docs/source/knowledge_bases.rst:
--------------------------------------------------------------------------------
1 | Knowledge bases
2 | -------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.knowledge_bases
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:


--------------------------------------------------------------------------------
/docs/source/ml_engines.rst:
--------------------------------------------------------------------------------
1 | ML Engines
2 | -------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.ml_engines
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:


--------------------------------------------------------------------------------
/docs/source/model.rst:
--------------------------------------------------------------------------------
1 | Models
2 | -------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.models
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:


--------------------------------------------------------------------------------
/docs/source/project.rst:
--------------------------------------------------------------------------------
1 | Projects
2 | ---------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.projects
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/query.rst:
--------------------------------------------------------------------------------
1 | Query
2 | -------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.query
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 
9 | 


--------------------------------------------------------------------------------
/docs/source/server.rst:
--------------------------------------------------------------------------------
1 | Server
2 | --------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.server
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:
8 | 


--------------------------------------------------------------------------------
/docs/source/skills.rst:
--------------------------------------------------------------------------------
1 | Skills
2 | -------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.skills
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:


--------------------------------------------------------------------------------
/docs/source/tables.rst:
--------------------------------------------------------------------------------
1 | Tables
2 | -------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.tables
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:


--------------------------------------------------------------------------------
/docs/source/views.rst:
--------------------------------------------------------------------------------
1 | Views
2 | -------------------------
3 | 
4 | .. automodule:: mindsdb_sdk.views
5 |    :members:
6 |    :undoc-members:
7 |    :show-inheritance:


--------------------------------------------------------------------------------
/examples/data/tokaido-rulebook.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindsdb/mindsdb_python_sdk/c8c51f2597f04d30e6d16b798a304153e7bd9233/examples/data/tokaido-rulebook.pdf


--------------------------------------------------------------------------------
/examples/home_rentals.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import mindsdb_sdk
 3 | 
 4 | con = mindsdb_sdk.connect()
 5 | 
 6 | # connect to database
 7 | db = con.databases.create(
 8 |     'example_db',
 9 |     engine='postgres',
10 |     connection_args={
11 |         "user": "demo_user",
12 |         "password": "demo_password",
13 |         "host": "3.220.66.106",
14 |         "port": "5432",
15 |         "database": "demo"
16 |     }
17 | )
18 | 
19 | # get table
20 | # because table with schema we are using .get
21 | tbl = db.tables.get('demo_data.home_rentals')
22 | 
23 | # create model
24 | model = con.models.create(
25 |     'home_rentals_model',
26 |     predict='rental_price',
27 |     query=tbl
28 | )
29 | 
30 | # wait till training complete
31 | model.wait_complete()
32 | 
33 | # make prediction for first 3 rows
34 | result = model.predict(tbl.limit(3))
35 | 
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/examples/using_agents.py:
--------------------------------------------------------------------------------
 1 | import mindsdb_sdk
 2 | 
 3 | con = mindsdb_sdk.connect()
 4 | 
 5 | # We currently support Langchain as a backend.
 6 | print('Creating underlying langchain model for the agent to use...')
 7 | try:
 8 |     langchain_engine = con.ml_engines.get('langchain')
 9 | except Exception:
10 |     # Create the engine if it doesn't exist.
11 |     langchain_engine = con.ml_engines.create('langchain', handler='langchain')
12 | 
13 | # Actually create the underlying model the agent will use.
14 | langchain_model = con.models.create(
15 |     'agent_model',
16 |     predict='answer',
17 |     mode='retrieval', # Use retrieval mode if using knowledge bases.
18 |     engine='langchain',
19 |     prompt_template='You are a spicy, cheeky assistant. Add some personality and flare when responding to the user question: {{question}}',
20 |     model_name='gpt-4-0125-preview' # This is the underlying LLM. Can use OpenAI, Claude, local Ollama, etc
21 |     # Can optionally set LLM args here. For example:
22 |     # temperature=0.0,
23 |     # max_tokens=1000,
24 |     # top_p=1.0,
25 |     # top_k=0,
26 |     # ...
27 | )
28 | print('Agent ready to use.')
29 | 
30 | # Now create an agent that will use the model we just created.
31 | agent = con.agents.create('new_agent', langchain_model)
32 | print('Ask a question: ')
33 | question = input()
34 | answer = agent.completion([{'question': question, 'answer': None}])
35 | print(answer.content)
36 | 


--------------------------------------------------------------------------------
/examples/using_agents_with_retrieval.py:
--------------------------------------------------------------------------------
 1 | import mindsdb_sdk
 2 | from uuid import uuid4
 3 | import os
 4 | 
 5 | con = mindsdb_sdk.connect()
 6 | 
 7 | open_ai_key = os.getenv('OPENAI_API_KEY')
 8 | model_name = 'gpt-4o'
 9 | 
10 | # Now create an agent that will use the model we just created.
11 | agent = con.agents.create(name=f'mindsdb_retrieval_agent_{model_name}_{uuid4().hex}',
12 |                           model=model_name,
13 |                           params={'return_context': True})
14 | 
15 | agent.add_file('./data/tokaido-rulebook.pdf', 'rule book for the board game Tokaido')
16 | 
17 | question = "what are the rules for the game takaido?"
18 | answer = agent.completion([{'question': question, 'answer': None}])
19 | print(answer.context)
20 | print(answer)
21 | 
22 | 


--------------------------------------------------------------------------------
/examples/using_agents_with_streaming_with_retrieval.py:
--------------------------------------------------------------------------------
 1 | import mindsdb_sdk
 2 | from uuid import uuid4
 3 | import os
 4 | 
 5 | con = mindsdb_sdk.connect()
 6 | 
 7 | open_ai_key = os.getenv('OPENAI_API_KEY')
 8 | model_name = 'gpt-4o'
 9 | 
10 | # Now create an agent that will use the model we just created.
11 | agent = con.agents.create(name=f'mindsdb_retrieval_agent_{model_name}_{uuid4().hex}',
12 |                           model=model_name,
13 |                           params={'return_context': True})
14 | 
15 | agent.add_file('./data/tokaido-rulebook.pdf', 'rule book for the board game Tokaido')
16 | 
17 | question = "what are the rules for the game tokaido?"
18 | 
19 | # Stream the completion
20 | completion_stream = agent.completion_stream([{'question': question, 'answer': None}])
21 | 
22 | # Process the streaming response
23 | full_response = ""
24 | for chunk in completion_stream:
25 |     print(chunk)  # Print the entire chunk for debugging
26 |     if isinstance(chunk, dict):
27 |         if 'output' in chunk:
28 |             full_response += chunk['output']
29 |     elif isinstance(chunk, str):
30 |         full_response += chunk
31 | 
32 | print("\n\nFull response:")
33 | print(full_response)
34 | 


--------------------------------------------------------------------------------
/examples/using_agents_with_text2sql.py:
--------------------------------------------------------------------------------
 1 | import mindsdb_sdk
 2 | from uuid import uuid4
 3 | import os
 4 | 
 5 | con = mindsdb_sdk.connect()
 6 | 
 7 | open_ai_key = os.getenv('OPENAI_API_KEY')
 8 | model_name = 'gpt-4o'
 9 | 
10 | # Now create an agent that will use the model we just created.
11 | agent = con.agents.create(name=f'mindsdb_sql_agent_{model_name}_{uuid4().hex}',
12 |                           model=model_name)
13 | 
14 | 
15 | # Set up a Postgres data source with our new agent.
16 | data_source = 'postgres'
17 | connection_args = {
18 |     "user": "demo_user",
19 |     "password": "demo_password",
20 |     "host": "samples.mindsdb.com",
21 |     "port": "5432",
22 |     "database": "demo",
23 |     "schema": "demo_data"
24 | }
25 | description = 'mindsdb demo database'
26 | database = con.databases.create(
27 |     f'mindsdb_sql_agent_datasource_{uuid4().hex}',
28 |     data_source,
29 |     connection_args
30 | )
31 | 
32 | # Actually connect the agent to the datasource.
33 | agent.add_database(database.name, [], description)
34 | 
35 | 
36 | question = 'How many three-bedroom houses were sold in 2008?'
37 | answer = agent.completion([{'question': question, 'answer': None}])
38 | print(answer.content)
39 | 
40 | con.databases.drop(database.name)
41 | con.agents.drop(agent.name)
42 | 


--------------------------------------------------------------------------------
/examples/using_agents_with_text2sql_streaming.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import mindsdb_sdk
 4 | from uuid import uuid4
 5 | import os
 6 | 
 7 | from mindsdb_sdk.utils.agents import MindsDBSQLStreamParser
 8 | 
 9 | con = mindsdb_sdk.connect()
10 | 
11 | open_ai_key = os.getenv('OPENAI_API_KEY')
12 | model_name = 'gpt-4o'
13 | 
14 | # Now create an agent that will use the model we just created.
15 | agent = con.agents.create(name=f'mindsdb_sql_agent_{model_name}_{uuid4().hex}',
16 |                           model=model_name)
17 | 
18 | # Set up a Postgres data source with our new agent.
19 | data_source = 'postgres'
20 | connection_args = {
21 |     "user": "demo_user",
22 |     "password": "demo_password",
23 |     "host": "samples.mindsdb.com",
24 |     "port": "5432",
25 |     "database": "demo",
26 |     "schema": "demo_data"
27 | }
28 | description = 'mindsdb demo database'
29 | database = con.databases.create(
30 |     f'mindsdb_sql_agent_datasource_{uuid4().hex}',
31 |     data_source,
32 |     connection_args
33 | )
34 | 
35 | # Actually connect the agent to the datasource.
36 | agent.add_database(database.name, [], description)
37 | 
38 | question = 'How many three-bedroom houses were sold in 2008?'
39 | 
40 | completion_stream = agent.completion_stream([{'question': question, 'answer': None}])
41 | 
42 | #default logging level is set to INFO, we can change it to DEBUG to see more detailed logs and get full agent steps
43 | mdb_parser = MindsDBSQLStreamParser()
44 | full_response, sql_query = mdb_parser.process_stream(completion_stream)
45 | 
46 | con.databases.drop(database.name)
47 | con.agents.drop(agent.name)
48 | 


--------------------------------------------------------------------------------
/examples/using_database_mind_text2sql.py:
--------------------------------------------------------------------------------
 1 | from uuid import uuid4
 2 | 
 3 | from openai import OpenAI
 4 | from mindsdb_sdk.utils.mind import create_mind, DatabaseConfig
 5 | import os
 6 | 
 7 | 
 8 | # Load MindsDB API key from environment variable. or set it here.
 9 | MINDSDB_API_KEY = os.getenv('MINDSDB_API_KEY')
10 | 
11 | # Set the base URL for the MindsDB LiteLLM proxy.
12 | base_url = 'https://llm.mdb.ai'
13 | 
14 | 
15 | # Connect to MindsDB LiteLLM proxy.
16 | client = OpenAI(
17 |     api_key=MINDSDB_API_KEY,
18 |     base_url=base_url
19 | )
20 | 
21 | # Create a Database Config.
22 | pg_config = DatabaseConfig(
23 |     description='House Sales',
24 |     type='postgres',
25 |     connection_args={
26 |         'user': 'demo_user',
27 |         'password': 'demo_password',
28 |         'host': 'samples.mindsdb.com',
29 |         'port': '5432',
30 |         'database': 'demo',
31 |         'schema': 'demo_data'
32 |     },
33 |     tables=['house_sales']
34 | )
35 | 
36 | # create a database mind
37 | mind = create_mind(
38 |     base_url= base_url,
39 |     api_key= MINDSDB_API_KEY,
40 |     name = f'my_house_data_mind_{uuid4().hex}',
41 |     data_source_configs=[pg_config],
42 | )
43 | 
44 | # Actually pass in our tool to get a SQL completion.
45 | completion = client.chat.completions.create(
46 |   model=mind.name,
47 |   messages=[
48 |     {'role': 'user', 'content': 'How many 2 bedroom houses sold in 2008?'}
49 |   ],
50 |   stream=False
51 | )
52 | 
53 | print(completion.choices[0].message.content)
54 | 


--------------------------------------------------------------------------------
/examples/using_openai.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import mindsdb_sdk
 3 | 
 4 | con = mindsdb_sdk.connect()
 5 | 
 6 | openai_handler = con.ml_handlers.openai
 7 | 
 8 | # create ml engine
 9 | openai = con.ml_engines.create(
10 |     'openai',
11 |     handler=openai_handler,
12 |     # handler='openai', # <-- another option to define handler
13 |     connection_data={'api_key': ''}
14 | )
15 | 
16 | # create model
17 | model = con.models.create(
18 |     'open1',
19 |     predict='answer',
20 |     engine=openai,  # created ml engine
21 |     prompt_template='answer question: {{q}}'
22 | )
23 | 
24 | # use model
25 | model.predict({'q': 'size of the sun'})


--------------------------------------------------------------------------------
/examples/working_with_tables.py:
--------------------------------------------------------------------------------
 1 | import mindsdb_sdk
 2 | import pandas as pd
 3 | 
 4 | con = mindsdb_sdk.connect()
 5 | 
 6 | # connect to mindsdb example database
 7 | example_db = con.databases.create(
 8 |     'example_db',
 9 |     engine='postgres',
10 |     connection_args={
11 |         "user": "demo_user",
12 |         "password": "demo_password",
13 |         "host": "3.220.66.106",
14 |         "port": "5432",
15 |         "database": "demo"
16 |     }
17 | )
18 | 
19 | # connect to the empty user database
20 | my_db = con.databases.create(
21 |     'my_db',
22 |     engine='postgres',
23 |     connection_args={
24 |         "user": "postgres",
25 |         "host": "localhost",
26 |         "port": "5432",
27 |         "database": "my_database"
28 |     }
29 | )
30 | 
31 | # get home_rentals table
32 | table1 = example_db.tables.get('demo_data.home_rentals')
33 | 
34 | # ---- create new table ----
35 | 
36 | # create table home_rentals in user db and fill it with rows with location=great
37 | table2 = my_db.tables.create('home_rentals', table1.filter(location='great'))
38 | 
39 | 
40 | # create table from csv file
41 | 
42 | df = pd.read_csv('my_data.csv')
43 | table3 = my_db.tables.create('my_table', df)
44 | 
45 | 
46 | # ---- insert into table ----
47 | 
48 | # insert to table2 first 10 rows from table1
49 | table2.insert(table1.limit(10))
50 | 
51 | 
52 | # ---- update data in table ----
53 | 
54 | # get all rows with number_of_rooms=1 from table1 and update values in table2 using key ('location', 'neighborhood')
55 | table2.update(
56 |     table1.filter(number_of_rooms=1),
57 |     on=['location', 'neighborhood']
58 | )
59 | 
60 | 
61 | # ---- delete rows from table ----
62 | 
63 | # delete all rows where bedrooms=2
64 | table2.delete(number_of_rooms=1)
65 | 
66 | 
67 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/__about__.py:
--------------------------------------------------------------------------------
 1 | __title__ = 'mindsdb_sdk'
 2 | __package_name__ = 'mindsdb_sdk'
 3 | __version__ = '3.4.3'
 4 | __description__ = "MindsDB Python SDK, provides an SDK to use a remote mindsdb instance"
 5 | __email__ = "jorge@mindsdb.com"
 6 | __author__ = 'MindsDB Inc'
 7 | __github__ = 'https://github.com/mindsdb/mindsdb_python_sdk'
 8 | __pypi__ = 'https://pypi.org/project/mindsdb-sdk/'
 9 | __license__ = 'MIT'
10 | __copyright__ = 'Copyright 2020- mindsdb'
11 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/__init__.py:
--------------------------------------------------------------------------------
1 | from mindsdb_sdk.connect import connect
2 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/agents.py:
--------------------------------------------------------------------------------
  1 | from requests.exceptions import HTTPError
  2 | from typing import Iterable, List, Union
  3 | from urllib.parse import urlparse
  4 | from uuid import uuid4
  5 | import datetime
  6 | import json
  7 | 
  8 | from mindsdb_sdk.knowledge_bases import KnowledgeBase
  9 | from mindsdb_sdk.models import Model
 10 | from mindsdb_sdk.skills import Skill
 11 | from mindsdb_sdk.utils.objects_collection import CollectionBase
 12 | 
 13 | _DEFAULT_LLM_MODEL = 'gpt-4o'
 14 | _DEFAULT_LLM_PROMPT = 'Answer the user"s question in a helpful way: {{question}}'
 15 | 
 16 | 
 17 | class AgentCompletion:
 18 |     """
 19 |     Represents a full MindsDB agent completion response.
 20 | 
 21 |     Attributes:
 22 |     content: The completion content.
 23 |     context: Only relevant for retrieval agents. Contains the context retrieved from the knowledge base.
 24 | 
 25 | 
 26 |     """
 27 | 
 28 |     def __init__(self, content: str, context: List[dict] = None):
 29 |         self.content = content
 30 |         self.context = context
 31 | 
 32 |     def __repr__(self):
 33 |         return f'{self.__class__.__name__}(content: {self.content}, context: {self.context})'
 34 | 
 35 | 
 36 | class Agent:
 37 |     """Represents a MindsDB agent.
 38 | 
 39 |     Working with agents:
 40 | 
 41 |     Get an agent by name:
 42 | 
 43 |     >>> agent = agents.get('my_agent')
 44 | 
 45 |     Query an agent:
 46 | 
 47 |     >>> completion = agent.completion([{'question': 'What is your name?', 'answer': None}])
 48 |     >>> print(completion.content)
 49 | 
 50 |     Query an agent with streaming:
 51 | 
 52 |     >>> completion = agent.completion_stream([{'question': 'What is your name?', 'answer': None}])
 53 |     >>> for chunk in completion:
 54 |             print(chunk.choices[0].delta.content)
 55 | 
 56 |     List all agents:
 57 | 
 58 |     >>> agents = agents.list()
 59 | 
 60 |     Create a new agent:
 61 | 
 62 |     >>> model = models.get('my_model') # Or use models.create(...)
 63 |     >>> # Connect your agent to a MindsDB table.
 64 |     >>> text_to_sql_skill = skills.create('text_to_sql', 'sql', { 'tables': ['my_table'], 'database': 'my_database' })
 65 |     >>> agent = agents.create('my_agent', model, [text_to_sql_skill])
 66 | 
 67 |     Update an agent:
 68 | 
 69 |     >>> new_model = models.get('new_model')
 70 |     >>> agent.model_name = new_model.name
 71 |     >>> new_skill = skills.create('new_skill', 'sql', { 'tables': ['new_table'], 'database': 'new_database' })
 72 |     >>> updated_agent.skills.append(new_skill)
 73 |     >>> updated_agent = agents.update('my_agent', agent)
 74 | 
 75 |     Delete an agent by name:
 76 | 
 77 |     >>> agents.drop('my_agent')
 78 |     """
 79 | 
 80 |     def __init__(
 81 |             self,
 82 |             name: str,
 83 |             model_name: str,
 84 |             skills: List[Skill],
 85 |             params: dict,
 86 |             created_at: datetime.datetime,
 87 |             updated_at: datetime.datetime,
 88 |             provider: str = None,
 89 |             collection: CollectionBase = None
 90 |     ):
 91 |         self.name = name
 92 |         self.model_name = model_name
 93 |         self.provider = provider
 94 |         self.skills = skills
 95 |         self.params = params
 96 |         self.created_at = created_at
 97 |         self.updated_at = updated_at
 98 |         self.collection = collection
 99 | 
100 |     def completion(self, messages: List[dict]) -> AgentCompletion:
101 |         return self.collection.completion(self.name, messages)
102 | 
103 |     def completion_v2(self, messages: List[dict]) -> AgentCompletion:
104 |         return self.collection.completion_v2(self.name, messages)
105 | 
106 |     def completion_stream(self, messages: List[dict]) -> Iterable[object]:
107 |         return self.collection.completion_stream(self.name, messages)
108 | 
109 |     def completion_stream_v2(self, messages: List[dict]) -> Iterable[object]:
110 |         return self.collection.completion_stream_v2(self.name, messages)
111 | 
112 |     def add_files(self, file_paths: List[str], description: str, knowledge_base: str = None):
113 |         """
114 |         Add a list of files to the agent for retrieval.
115 | 
116 |         :param file_paths: List of paths to the files to be added.
117 |         """
118 |         self.collection.add_files(self.name, file_paths, description, knowledge_base)
119 | 
120 |     def add_file(self, file_path: str, description: str, knowledge_base: str = None):
121 |         """
122 |         Add a file to the agent for retrieval.
123 | 
124 |         :param file_path: Path to the file to be added.
125 |         """
126 |         self.collection.add_file(self.name, file_path, description, knowledge_base)
127 | 
128 |     def add_webpages(
129 |             self,
130 |             urls: List[str],
131 |             description: str,
132 |             knowledge_base: str = None,
133 |             crawl_depth: int = 1,
134 |             limit: int = None,
135 |             filters: List[str] = None):
136 |         """
137 |         Add a crawled URL to the agent for retrieval.
138 | 
139 |         :param urls: URLs of pages to be crawled and added.
140 |         :param description: Description of the webpages. Used by agent to know when to do retrieval.
141 |         :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
142 |         :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only, -1 = default max
143 |         :param limit: max count of pages to crawl
144 |         :param filters: Include only URLs that match these regex patterns
145 |         """
146 |         self.collection.add_webpages(self.name, urls, description, knowledge_base=knowledge_base,
147 |                                      crawl_depth=crawl_depth, limit=limit, filters=filters)
148 | 
149 |     def add_webpage(
150 |             self,
151 |             url: str,
152 |             description: str,
153 |             knowledge_base: str = None,
154 |             crawl_depth: int = 1,
155 |             limit: int = None,
156 |             filters: List[str] = None):
157 |         """
158 |         Add a crawled URL to the agent for retrieval.
159 | 
160 |         :param url: URL of the page to be crawled and added.
161 |         :param description: Description of the webpages. Used by agent to know when to do retrieval.
162 |         :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
163 |         :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only, -1 = default max
164 |         :param limit: max count of pages to crawl
165 |         :param filters: Include only URLs that match these regex patterns
166 |         """
167 |         self.collection.add_webpage(self.name, url, description, knowledge_base=knowledge_base,
168 |                                     crawl_depth=crawl_depth, limit=limit, filters=filters)
169 | 
170 |     def add_database(self, database: str, tables: List[str], description: str):
171 |         """
172 |         Add a database to the agent for retrieval.
173 | 
174 |         :param database: Name of the database to be added.
175 |         :param tables: List of tables to be added.
176 |         :param description: Description of the database tables. Used by the agent to know when to use SQL skill.
177 |         """
178 |         self.collection.add_database(self.name, database, tables, description)
179 | 
180 |     def __repr__(self):
181 |         return f'{self.__class__.__name__}(name: {self.name})'
182 | 
183 |     def __eq__(self, other):
184 |         if self.name != other.name:
185 |             return False
186 |         if self.model_name != other.model_name:
187 |             return False
188 |         if self.provider != other.provider:
189 |             return False
190 |         if self.skills != other.skills:
191 |             return False
192 |         if self.params != other.params:
193 |             return False
194 |         if self.created_at != other.created_at:
195 |             return False
196 |         return self.updated_at == other.updated_at
197 | 
198 |     @classmethod
199 |     def from_json(cls, json: dict, collection: CollectionBase):
200 |         return cls(
201 |             json['name'],
202 |             json['model_name'],
203 |             [Skill.from_json(skill) for skill in json['skills']],
204 |             json['params'],
205 |             json['created_at'],
206 |             json['updated_at'],
207 |             json['provider'],
208 |             collection
209 |         )
210 | 
211 | 
212 | class Agents(CollectionBase):
213 |     """Collection for agents"""
214 | 
215 |     def __init__(self, project, api):
216 |         self.api = api
217 |         self.project = project
218 | 
219 |         self.knowledge_bases = project.knowledge_bases
220 |         self.models = project.models
221 |         self.skills = project.skills
222 | 
223 |         self.databases = project.server.databases
224 |         self.ml_engines = project.server.ml_engines
225 | 
226 |     def list(self) -> List[Agent]:
227 |         """
228 |         List available agents.
229 | 
230 |         :return: list of agents
231 |         """
232 |         data = self.api.agents(self.project.name)
233 |         return [Agent.from_json(agent, self) for agent in data]
234 | 
235 |     def get(self, name: str) -> Agent:
236 |         """
237 |         Gets an agent by name.
238 | 
239 |         :param name: Name of the agent
240 | 
241 |         :return: agent with given name
242 |         """
243 |         data = self.api.agent(self.project.name, name)
244 |         return Agent.from_json(data, self)
245 | 
246 |     def completion(self, name: str, messages: List[dict]) -> AgentCompletion:
247 |         """
248 |         Queries the agent for a completion.
249 | 
250 |         :param name: Name of the agent
251 |         :param messages: List of messages to be sent to the agent
252 | 
253 |         :return: completion from querying the agent
254 |         """
255 |         data = self.api.agent_completion(self.project.name, name, messages)
256 |         if 'context' in data['message']:
257 |             return AgentCompletion(data['message']['content'], data['message'].get('context'))
258 | 
259 |         return AgentCompletion(data['message']['content'])
260 | 
261 |     def completion_v2(self, name: str, messages: List[dict]) -> AgentCompletion:
262 |         """
263 |         Queries the agent for a completion.
264 | 
265 |         :param name: Name of the agent
266 |         :param messages: List of messages to be sent to the agent
267 | 
268 |         :return: completion from querying the agent
269 |         """
270 |         return self.api.agent_completion(self.project.name, name, messages)
271 | 
272 |     def completion_stream(self, name, messages: List[dict]) -> Iterable[object]:
273 |         """
274 |         Queries the agent for a completion and streams the response as an iterable object.
275 | 
276 |         :param name: Name of the agent
277 |         :param messageS: List of messages to be sent to the agent
278 | 
279 |         :return: iterable of completion chunks from querying the agent.
280 |         """
281 |         return self.api.agent_completion_stream(self.project.name, name, messages)
282 | 
283 |     def completion_stream_v2(self, name, messages: List[dict]) -> Iterable[object]:
284 |         """
285 |         Queries the agent for a completion and streams the response as an iterable object.
286 | 
287 |         :param name: Name of the agent
288 |         :param messages: List of messages to be sent to the agent
289 | 
290 |         :return: iterable of completion chunks from querying the agent.
291 |         """
292 |         return self.api.agent_completion_stream_v2(self.project.name, name, messages)
293 | 
294 |     def _create_default_knowledge_base(self, agent: Agent, name: str) -> KnowledgeBase:
295 |         # Make sure default ML engine for embeddings exists.
296 |         try:
297 |             _ = self.ml_engines.get('langchain_embedding')
298 |         except AttributeError:
299 |             _ = self.ml_engines.create('langchain_embedding', 'langchain_embedding')
300 |         # Include API keys in embeddings.
301 |         if agent.provider == "mindsdb":
302 |             agent_model = self.models.get(agent.model_name)
303 |             training_options = json.loads(agent_model.data.get('training_options', '{}'))
304 |             training_options_using = training_options.get('using', {})
305 |             api_key_params = {k: v for k, v in training_options_using.items() if 'api_key' in k}
306 |             kb = self.knowledge_bases.create(name, params=api_key_params)
307 |         else:
308 |             kb = self.knowledge_bases.create(name)
309 |         # Wait for underlying embedding model to finish training.
310 |         kb.model.wait_complete()
311 |         return kb
312 | 
313 |     def add_files(self, name: str, file_paths: List[str], description: str, knowledge_base: str = None):
314 |         """
315 |         Add a list of files to the agent for retrieval.
316 | 
317 |         :param name: Name of the agent
318 |         :param file_paths: List of paths or URLs to the files to be added.
319 |         :param description: Description of the file. Used by agent to know when to do retrieval
320 |         :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
321 |         """
322 |         if not file_paths:
323 |             return
324 |         filename_no_extension = ''
325 |         all_filenames = []
326 |         for file_path in file_paths:
327 |             filename = file_path.split('/')[-1].lower()
328 |             filename_no_extension = filename.split('.')[0]
329 |             all_filenames.append(filename_no_extension)
330 |             try:
331 |                 _ = self.api.get_file_metadata(filename_no_extension)
332 |             except HTTPError as e:
333 |                 if e.response.status_code >= 400 and e.response.status_code != 404:
334 |                     raise e
335 |                 # upload file to mindsdb
336 |                 self.api.upload_file(filename, file_path)
337 | 
338 |         # Insert uploaded files into new knowledge base.
339 |         agent = self.get(name)
340 |         if knowledge_base is not None:
341 |             kb = self.knowledge_bases.get(knowledge_base)
342 |         else:
343 |             kb_name = f'{name.lower()}_{filename_no_extension}_{uuid4().hex}_kb'
344 |             kb = self._create_default_knowledge_base(agent, kb_name)
345 | 
346 |         # Insert the entire file.
347 |         kb.insert_files(all_filenames)
348 | 
349 |         # Make sure skill name is unique.
350 |         skill_name = f'{filename_no_extension}_retrieval_skill_{uuid4().hex}'
351 |         retrieval_params = {
352 |             'source': kb.name,
353 |             'description': description,
354 |         }
355 |         file_retrieval_skill = self.skills.create(skill_name, 'retrieval', retrieval_params)
356 |         agent.skills.append(file_retrieval_skill)
357 |         self.update(agent.name, agent)
358 | 
359 |     def add_file(self, name: str, file_path: str, description: str, knowledge_base: str = None):
360 |         """
361 |         Add a file to the agent for retrieval.
362 | 
363 |         :param name: Name of the agent
364 |         :param file_path: Path to the file to be added, or name of existing file.
365 |         :param description: Description of the file. Used by agent to know when to do retrieval
366 |         :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
367 |         """
368 |         self.add_files(name, [file_path], description, knowledge_base)
369 | 
370 |     def add_webpages(
371 |             self,
372 |             name: str,
373 |             urls: List[str],
374 |             description: str,
375 |             knowledge_base: str = None,
376 |             crawl_depth: int = 1,
377 |             limit: int = None,
378 |             filters: List[str] = None
379 |             ):
380 |         """
381 |         Add a list of webpages to the agent for retrieval.
382 | 
383 |         :param name: Name of the agent
384 |         :param urls: List of URLs of the webpages to be added.
385 |         :param description: Description of the webpages. Used by agent to know when to do retrieval.
386 |         :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
387 |         :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only
388 |         :param limit: max count of pages to crawl
389 |         :param filters: Include only URLs that match these regex patterns
390 |         """
391 |         if not urls:
392 |             return
393 |         agent = self.get(name)
394 |         for url in urls:
395 |             # Validate URLs.
396 |             _ = urlparse(url)
397 |         if knowledge_base is not None:
398 |             kb = self.knowledge_bases.get(knowledge_base)
399 |         else:
400 |             kb_name = f'{name.lower()}_web_{uuid4().hex}_kb'
401 |             kb = self._create_default_knowledge_base(agent, kb_name)
402 | 
403 |         # Insert crawled webpage.
404 |         kb.insert_webpages(urls, crawl_depth=crawl_depth, filters=filters, limit=limit)
405 | 
406 |         # Make sure skill name is unique.
407 |         skill_name = f'web_retrieval_skill_{uuid4().hex}'
408 |         retrieval_params = {
409 |             'source': kb.name,
410 |             'description': description,
411 |         }
412 |         webpage_retrieval_skill = self.skills.create(skill_name, 'retrieval', retrieval_params)
413 |         agent.skills.append(webpage_retrieval_skill)
414 |         self.update(agent.name, agent)
415 | 
416 |     def add_webpage(
417 |             self,
418 |             name: str,
419 |             url: str,
420 |             description: str,
421 |             knowledge_base: str = None,
422 |             crawl_depth: int = 1,
423 |             limit: int = None,
424 |             filters: List[str] = None):
425 |         """
426 |         Add a webpage to the agent for retrieval.
427 | 
428 |         :param name: Name of the agent
429 |         :param file_path: URL of the webpage to be added, or name of existing webpage.
430 |         :param description: Description of the webpage. Used by agent to know when to do retrieval.
431 |         :param knowledge_base: Name of an existing knowledge base to be used. Will create a default knowledge base if not given.
432 |         :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only
433 |         :param limit: max count of pages to crawl
434 |         :param filters: Include only URLs that match these regex patterns
435 |         """
436 |         self.add_webpages(name, [url], description, knowledge_base=knowledge_base,
437 |                           crawl_depth=crawl_depth, limit=limit, filters=filters)
438 | 
439 |     def add_database(self, name: str, database: str, tables: List[str], description: str):
440 |         """
441 |         Add a database to the agent for retrieval.
442 | 
443 |         :param name: Name of the agent
444 |         :param database: Name of the database to be added.
445 |         :param tables: List of tables to be added.
446 |         :param description: Description of the database. Used by agent to know when to do retrieval.
447 |         """
448 |         # Make sure database exists.
449 |         db = self.databases.get(database)
450 |         # Make sure tables exist.
451 |         all_table_names = set([t.name for t in db.tables.list()])
452 |         for t in tables:
453 |             if t not in all_table_names:
454 |                 raise ValueError(f'Table {t} does not exist in database {database}.')
455 | 
456 |         # Make sure skill name is unique.
457 |         skill_name = f'{database}_sql_skill_{uuid4().hex}'
458 |         sql_params = {
459 |             'database': database,
460 |             'tables': tables,
461 |             'description': description,
462 |         }
463 |         database_sql_skill = self.skills.create(skill_name, 'sql', sql_params)
464 |         agent = self.get(name)
465 | 
466 |         if not agent.params:
467 |             agent.params = {}
468 |         if 'prompt_template' not in agent.params:
469 |             # Set default prompt template. This is for langchain agent check.
470 |             agent.params['prompt_template'] = 'using mindsdb sqltoolbox'
471 | 
472 |         agent.skills.append(database_sql_skill)
473 |         self.update(agent.name, agent)
474 | 
475 |     def create(
476 |             self,
477 |             name: str,
478 |             model: Union[Model, dict, str] = None,
479 |             provider: str = None,
480 |             skills: List[Union[Skill, str]] = None,
481 |             params: dict = None,
482 |             **kwargs) -> Agent:
483 |         """
484 |         Create new agent and return it
485 | 
486 |         :param name: Name of the agent to be created
487 |         :param model: Model to be used by the agent
488 |         :param skills: List of skills to be used by the agent. Currently only 'sql' is supported.
489 |         :param params: Parameters for the agent
490 | 
491 |         :return: created agent object
492 |         """
493 |         skills = skills or []
494 |         skill_names = []
495 |         for skill in skills:
496 |             if isinstance(skill, str):
497 |                 # Check if skill exists.
498 |                 # TODO what this line does?
499 |                 _ = self.skills.get(skill)
500 |                 skill_names.append(skill)
501 |                 continue
502 |             # Create the skill if it doesn't exist.
503 |             _ = self.skills.create(skill.name, skill.type, skill.params)
504 |             skill_names.append(skill.name)
505 | 
506 |         if params is None:
507 |             params = {}
508 |         params.update(kwargs)
509 | 
510 |         if 'prompt_template' not in params:
511 |             params['prompt_template'] = _DEFAULT_LLM_PROMPT
512 | 
513 |         if model is None:
514 |             model = _DEFAULT_LLM_MODEL
515 |         elif isinstance(model, Model):
516 |             model = model.name
517 |             provider = 'mindsdb'
518 | 
519 |         data = self.api.create_agent(self.project.name, name, model, provider, skill_names, params)
520 |         return Agent.from_json(data, self)
521 | 
522 |     def update(self, name: str, updated_agent: Agent):
523 |         """
524 |         Update an agent by name.
525 | 
526 |         :param name: Name of the agent to be updated
527 |         :param updated_agent: Agent with updated fields
528 | 
529 |         :return: updated agent object
530 |         """
531 |         updated_skills = set()
532 |         for skill in updated_agent.skills:
533 |             if isinstance(skill, str):
534 |                 # Skill must exist.
535 |                 _ = self.skills.get(skill)
536 |                 updated_skills.add(skill)
537 |                 continue
538 |             try:
539 |                 # Create the skill if it doesn't exist.
540 |                 _ = self.skills.get(skill.name)
541 |             except HTTPError as e:
542 |                 if e.response.status_code != 404:
543 |                     raise e
544 |                 # Doesn't exist
545 |                 _ = self.skills.create(skill.name, skill.type, skill.params)
546 |             updated_skills.add(skill.name)
547 | 
548 |         existing_agent = self.api.agent(self.project.name, name)
549 |         existing_skills = set([s['name'] for s in existing_agent['skills']])
550 |         skills_to_add = updated_skills.difference(existing_skills)
551 |         skills_to_remove = existing_skills.difference(updated_skills)
552 |         data = self.api.update_agent(
553 |             self.project.name,
554 |             name,
555 |             updated_agent.name,
556 |             updated_agent.provider,
557 |             updated_agent.model_name,
558 |             list(skills_to_add),
559 |             list(skills_to_remove),
560 |             updated_agent.params
561 |         )
562 |         return Agent.from_json(data, self)
563 | 
564 |     def drop(self, name: str):
565 |         """
566 |         Drop an agent by name.
567 | 
568 |         :param name: Name of the agent to be dropped
569 |         """
570 |         _ = self.api.delete_agent(self.project.name, name)
571 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/connect.py:
--------------------------------------------------------------------------------
 1 | from mindsdb_sdk.server import Server
 2 | 
 3 | from mindsdb_sdk.connectors.rest_api import RestAPI
 4 | 
 5 | DEFAULT_LOCAL_API_URL = 'http://127.0.0.1:47334'
 6 | DEFAULT_CLOUD_API_URL = 'https://cloud.mindsdb.com'
 7 | 
 8 | 
 9 | def connect(
10 |         url: str = None,
11 |         login: str = None,
12 |         password: str = None,
13 |         api_key: str = None,
14 |         is_managed: bool = False,
15 |         cookies=None,
16 |         headers=None) -> Server:
17 |     """
18 |     Create connection to mindsdb server
19 | 
20 |     :param url: url to mindsdb server
21 |     :param login: user login, for cloud version it contents email
22 |     :param password: user password to login (for cloud version)
23 |     :param api_key: API key to authenticate (for cloud version)
24 |     :param is_managed: whether or not the URL points to a managed instance
25 |     :param cookies: addtional cookies to send with the connection, optional
26 |     :param headers: addtional headers to send with the connection, optional
27 |     :return: Server object
28 | 
29 |     Examples
30 |     --------
31 | 
32 |     >>> import mindsdb_sdk
33 | 
34 |     Connect to local server
35 | 
36 |     >>> con = mindsdb_sdk.connect()
37 |     >>> con = mindsdb_sdk.connect('http://127.0.0.1:47334')
38 | 
39 |     Connect to cloud server
40 | 
41 |     >>> con = mindsdb_sdk.connect('https://cloud.mindsdb.com', api_key='-')
42 | 
43 |     Connect to MindsDB pro
44 | 
45 |     >>> con = mindsdb_sdk.connect('http://<YOUR_INSTANCE_IP>', login='a@b.com', password='-', is_managed=True)
46 | 
47 |     """
48 |     if url is None:
49 |         if login is not None:
50 |             # default is cloud
51 |             url = DEFAULT_CLOUD_API_URL
52 |         else:
53 |             # is local
54 |             url = DEFAULT_LOCAL_API_URL
55 | 
56 |     api = RestAPI(url, login, password, api_key, is_managed, 
57 |                   cookies=cookies, headers=headers)
58 | 
59 |     return Server(api)
60 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/connectors/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindsdb/mindsdb_python_sdk/c8c51f2597f04d30e6d16b798a304153e7bd9233/mindsdb_sdk/connectors/__init__.py


--------------------------------------------------------------------------------
/mindsdb_sdk/connectors/rest_api.py:
--------------------------------------------------------------------------------
  1 | from functools import wraps
  2 | from typing import List, Union
  3 | import io
  4 | import json
  5 | 
  6 | import requests
  7 | import pandas as pd
  8 | import validators
  9 | 
 10 | from mindsdb_sdk import __about__
 11 | from sseclient import SSEClient
 12 | 
 13 | 
 14 | def _try_relogin(fnc):
 15 |     @wraps(fnc)
 16 |     def wrapper(self, *args, **kwargs):
 17 |         try:
 18 |             return fnc(self, *args, **kwargs)
 19 |         except requests.HTTPError as e:
 20 |             if e.response.status_code != 401:
 21 |                 raise e
 22 | 
 23 |             # try re-login
 24 |             try:
 25 |                 self.login()
 26 |             except requests.HTTPError:
 27 |                 raise e
 28 |             # call once more
 29 |             return fnc(self, *args, **kwargs)
 30 |     return wrapper
 31 | 
 32 | 
 33 | def _raise_for_status(response):
 34 |     # show response text in error
 35 |     if 400 <= response.status_code < 600:
 36 |         raise requests.HTTPError(f'{response.reason}: {response.text}', response=response)
 37 | 
 38 | 
 39 | class RestAPI:
 40 |     def __init__(self, url=None, login=None, password=None, api_key=None, is_managed=False,
 41 |                  cookies=None, headers=None):
 42 | 
 43 |         self.url = url
 44 |         self.username = login
 45 |         self.password = password
 46 |         self.api_key = api_key
 47 |         self.is_managed = is_managed
 48 |         self.session = requests.Session()
 49 | 
 50 |         if cookies is not None:
 51 |             self.session.cookies.update(cookies)
 52 | 
 53 |         self.session.headers['User-Agent'] = f'python-sdk/{__about__.__version__}'
 54 |         if headers is not None:
 55 |             self.session.headers.update(headers)
 56 |         if self.api_key is not None:
 57 |             # Authenticate with API key instead of logging in, if present.
 58 |             self.session.headers['X-Api-Key'] = self.api_key
 59 |             return
 60 |         if login is not None:
 61 |             self.login()
 62 | 
 63 |     def login(self):
 64 |         managed_endpoint = '/api/login'
 65 |         cloud_endpoint = '/cloud/login'
 66 | 
 67 |         if self.is_managed:
 68 |             json = {'password': self.password, 'username': self.username}
 69 |             url = self.url + managed_endpoint
 70 |         else:
 71 |             json = {'password': self.password, 'email': self.username}
 72 |             url = self.url + cloud_endpoint
 73 |         r = self.session.post(url, json=json)
 74 | 
 75 |         # failback when is using managed instance with is_managed=False
 76 |         if r.status_code in (405, 404) and self.is_managed is False:
 77 |             # try managed instance login
 78 | 
 79 |             json = {'password': self.password, 'username': self.username}
 80 |             url = self.url + managed_endpoint
 81 |             r = self.session.post(url, json=json)
 82 | 
 83 |         _raise_for_status(r)
 84 | 
 85 |     @_try_relogin
 86 |     def sql_query(self, sql, database=None, lowercase_columns=False):
 87 | 
 88 |         if database is None:
 89 |             # it means the database is included in query
 90 |             database = 'mindsdb'
 91 |         url = self.url + '/api/sql/query'
 92 |         r = self.session.post(url, json={
 93 |             'query': sql,
 94 |             'context': {'db': database}
 95 |         })
 96 |         _raise_for_status(r)
 97 | 
 98 |         data = r.json()
 99 |         if data['type'] == 'table':
100 |             columns = data['column_names']
101 |             if lowercase_columns:
102 |                 columns = [i.lower() for i in columns]
103 |             return pd.DataFrame(data['data'], columns=columns)
104 |         if data['type'] == 'error':
105 |             raise RuntimeError(data['error_message'])
106 |         return None
107 | 
108 |     @_try_relogin
109 |     def projects(self):
110 |         # TODO not used yet
111 | 
112 |         r = self.session.get(self.url + '/api/projects')
113 |         _raise_for_status(r)
114 | 
115 |         return pd.DataFrame(r.json())
116 | 
117 |     @_try_relogin
118 |     def model_predict(self, project, model, data, params=None, version=None):
119 |         data = data.to_dict('records')
120 | 
121 |         if version is not None:
122 |             model = f'{model}.{version}'
123 |         if params is None:
124 |             params = {}
125 |         url = self.url + f'/api/projects/{project}/models/{model}/predict'
126 |         r = self.session.post(url, json={
127 |             'data': data,
128 |             'params': params
129 |         })
130 |         _raise_for_status(r)
131 | 
132 |         return pd.DataFrame(r.json())
133 | 
134 |     @_try_relogin
135 |     def objects_tree(self, item=''):
136 |         r = self.session.get(self.url + f'/api/tree/{item}')
137 |         _raise_for_status(r)
138 | 
139 |         return pd.DataFrame(r.json())
140 | 
141 |     @staticmethod
142 |     def read_file_as_bytes(file_path: str):
143 |         """
144 |         Read and return content of a file in bytes, given its path.
145 |         :param file_path: Path of the file to read.
146 |         :return: File content in bytes.
147 |         """
148 |         try:
149 |             with open(file_path, 'rb+') as file:
150 |                 return file.read()
151 |         except FileNotFoundError:
152 |             raise Exception(f'File {file_path} does not exist.')
153 |         except PermissionError:
154 |             raise Exception(f'Permission denied when reading file {file_path}.')
155 |         except Exception as e:
156 |             raise Exception(f'Unknown error occurred when reading file {file_path} - {str(e)}')
157 | 
158 |     @staticmethod
159 |     def read_dataframe_as_csv(data: pd.DataFrame):
160 |         """
161 |         Read and return content of a DataFrame as CSV in bytes.
162 |         :param data: DataFrame to read.
163 |         :return: DataFrame content as CSV in bytes.
164 |         """
165 |         fd = io.BytesIO()
166 |         data.to_csv(fd, index=False)
167 |         fd.seek(0)
168 |         return fd.read()
169 | 
170 |     @staticmethod
171 |     def read_file_as_webpage(url: str):
172 |         """
173 |         Read and return content of a file in bytes, given its URL.
174 |         :param file_path: URL of the file to read.
175 |         :return: File content in bytes.
176 |         """
177 |         data = requests.get(url)
178 |         return data.content
179 | 
180 |     def upload_data(self, file_name: str, data: bytes):
181 |         """
182 |         Upload binary data to MindsDB.
183 |         :param file_name: Name of the file.
184 |         :param data: Binary data to upload.
185 |         """
186 |         # remove suffix from file if present
187 |         name = file_name.split('.')[0]
188 | 
189 |         url = self.url + f'/api/files/{name}'
190 |         r = self.session.put(
191 |             url,
192 |             data={
193 |                 'original_file_name':file_name,
194 |                 'name':name,
195 |                 'source_type':'file',
196 |             },
197 |             files={
198 |                 'file': (file_name, data)
199 | 
200 |             }
201 |         )
202 |         _raise_for_status(r)
203 | 
204 |     @_try_relogin
205 |     def upload_file(self, name: str, data: Union[pd.DataFrame, str]):
206 |         """
207 |         Upload a file or a DataFrame to MindsDB.
208 |         :param name: Name of the file or DataFrame.
209 |         :param data: DataFrame data or file path.
210 |         """
211 |         if isinstance(data, pd.DataFrame):
212 |             data_in_bytes = self.read_dataframe_as_csv(data)
213 |         elif validators.url(data):
214 |             data_in_bytes = self.read_file_as_webpage(data)
215 |         else:
216 |             data_in_bytes = self.read_file_as_bytes(data)
217 | 
218 |         self.upload_data(name, data_in_bytes)
219 | 
220 |     @_try_relogin
221 |     def get_file_metadata(self, name: str) -> dict:
222 |         # No endpoint currently to get single file.
223 |         url = self.url + f'/api/files/'
224 |         r = self.session.get(url)
225 |         _raise_for_status(r)
226 |         all_file_metadata = r.json()
227 |         for metadata in all_file_metadata:
228 |             if metadata.get('name', None) == name:
229 |                 return metadata
230 |         r.status_code = 404
231 |         raise requests.HTTPError(f'Not found: No file named {name} found', response=r)
232 | 
233 |     @_try_relogin
234 |     def upload_byom(self, name: str, code: str, requirements: str):
235 | 
236 |         url = self.url + f'/api/handlers/byom/{name}'
237 |         r = self.session.put(
238 |             url,
239 |             files={
240 |                 'code': code,
241 |                 'modules': requirements,
242 |             }
243 |         )
244 |         _raise_for_status(r)
245 | 
246 |     def status(self) -> dict:
247 | 
248 |         r = self.session.get(self.url + '/api/status')
249 |         _raise_for_status(r)
250 | 
251 |         return r.json()
252 | 
253 |     # TODO: Different endpoints should be refactored into their own classes.
254 |     #
255 |     # Agents operations.
256 |     @_try_relogin
257 |     def agents(self, project: str):
258 |         r = self.session.get(self.url + f'/api/projects/{project}/agents')
259 |         _raise_for_status(r)
260 | 
261 |         return r.json()
262 | 
263 |     @_try_relogin
264 |     def agent(self, project: str, name: str):
265 |         r = self.session.get(self.url + f'/api/projects/{project}/agents/{name}')
266 |         _raise_for_status(r)
267 | 
268 |         return r.json()
269 | 
270 |     @_try_relogin
271 |     def agent_completion(self, project: str, name: str, messages: List[dict]):
272 |         url = self.url + f'/api/projects/{project}/agents/{name}/completions'
273 |         r = self.session.post(
274 |             url,
275 |             json={
276 |                 'messages': messages
277 |             }
278 |         )
279 |         _raise_for_status(r)
280 | 
281 |         return r.json()
282 | 
283 |     @_try_relogin
284 |     def agent_completion_stream(self, project: str, name: str, messages: List[dict]):
285 |         url = self.url + f'/api/projects/{project}/agents/{name}/completions/stream'
286 |         stream = self.session.post(url, json={'messages': messages}, stream=True)
287 |         client = SSEClient(stream)
288 |         for chunk in client.events():
289 |             # Stream objects loaded from SSE events 'data' param.
290 |             yield json.loads(chunk.data)
291 | 
292 |     @_try_relogin
293 |     def agent_completion_stream_v2(self, project: str, name: str, messages: List[dict]):
294 |         url = self.url + f'/api/projects/{project}/agents/{name}/completions/stream'
295 |         response = self.session.post(url, json={'messages': messages}, stream=True)
296 | 
297 |         # Check for HTTP errors before processing the stream
298 |         response.raise_for_status()
299 | 
300 |         client = SSEClient(response)
301 | 
302 |         try:
303 |             for chunk in client.events():
304 |                 yield chunk  # Stream SSE events
305 |         except Exception as e:
306 |             yield e
307 | 
308 |     @_try_relogin
309 |     def create_agent(self, project: str, name: str, model: str = None, provider: str = None, skills: List[str] = None, params: dict = None):
310 |         url = self.url + f'/api/projects/{project}/agents'
311 |         r = self.session.post(
312 |             url,
313 |             json={
314 |                 'agent': {
315 |                     'name': name,
316 |                     'model_name': model,
317 |                     'provider': provider,
318 |                     'skills': skills,
319 |                     'params': params
320 |                 }
321 |             }
322 |         )
323 |         _raise_for_status(r)
324 |         return r.json()
325 | 
326 |     @_try_relogin
327 |     def update_agent(
328 |             self,
329 |             project: str,
330 |             name: str,
331 |             updated_name: str,
332 |             updated_provider: str,
333 |             updated_model: str,
334 |             skills_to_add: List[str],
335 |             skills_to_remove: List[str],
336 |             updated_params: dict
337 |             ):
338 |         url = self.url + f'/api/projects/{project}/agents/{name}'
339 |         r = self.session.put(
340 |             url,
341 |             json={
342 |                 'agent': {
343 |                     'name': updated_name,
344 |                     'model_name': updated_model,
345 |                     'provider': updated_provider,
346 |                     'skills_to_add': skills_to_add,
347 |                     'skills_to_remove': skills_to_remove,
348 |                     'params': updated_params
349 |                 }
350 |             }
351 |         )
352 |         _raise_for_status(r)
353 |         return r.json()
354 | 
355 |     @_try_relogin
356 |     def delete_agent(self, project: str, name: str):
357 |         url = self.url + f'/api/projects/{project}/agents/{name}'
358 |         r = self.session.delete(url)
359 |         _raise_for_status(r)
360 | 
361 |     # Skills operations.
362 |     @_try_relogin
363 |     def skills(self, project: str):
364 |         r = self.session.get(self.url + f'/api/projects/{project}/skills')
365 |         _raise_for_status(r)
366 | 
367 |         return r.json()
368 | 
369 |     @_try_relogin
370 |     def skill(self, project: str, name: str):
371 |         r = self.session.get(self.url + f'/api/projects/{project}/skills/{name}')
372 |         _raise_for_status(r)
373 | 
374 |         return r.json()
375 | 
376 |     @_try_relogin
377 |     def create_skill(self, project: str, name: str, type: str, params: dict):
378 |         url = self.url + f'/api/projects/{project}/skills'
379 |         r = self.session.post(
380 |             url,
381 |             json={
382 |                'skill': {
383 |                     'name': name,
384 |                     'type': type,
385 |                     'params': params
386 |                 }
387 |             }
388 |         )
389 |         _raise_for_status(r)
390 | 
391 |         return r.json()
392 | 
393 |     @_try_relogin
394 |     def update_skill(
395 |             self,
396 |             project: str,
397 |             name: str,
398 |             updated_name: str,
399 |             updated_type: str,
400 |             updated_params: dict
401 |             ):
402 |         url = self.url + f'/api/projects/{project}/skills/{name}'
403 |         r = self.session.put(
404 |             url,
405 |             json={
406 |                'skill': {
407 |                     'name': updated_name,
408 |                     'type': updated_type,
409 |                     'params': updated_params
410 |                 }
411 |             }
412 |         )
413 |         _raise_for_status(r)
414 |         return r.json()
415 | 
416 |     @_try_relogin
417 |     def delete_skill(self, project: str, name: str):
418 |         url = self.url + f'/api/projects/{project}/skills/{name}'
419 |         r = self.session.delete(url)
420 |         _raise_for_status(r)
421 | 
422 |     # Knowledge Base operations.
423 |     @_try_relogin
424 |     def insert_into_knowledge_base(self, project: str, knowledge_base_name: str, data):
425 |         r = self.session.put(
426 |             self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}',
427 |             json={
428 |                 'knowledge_base': data
429 |             }
430 |         )
431 |         _raise_for_status(r)
432 | 
433 |         return r.json()
434 | 
435 |     @_try_relogin
436 |     def list_knowledge_bases(self, project: str):
437 |         r = self.session.get(self.url + f'/api/projects/{project}/knowledge_bases')
438 |         _raise_for_status(r)
439 |         return r.json()
440 | 
441 |     @_try_relogin
442 |     def get_knowledge_base(self, project: str, knowledge_base_name):
443 |         r = self.session.get(self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}')
444 |         _raise_for_status(r)
445 |         return r.json()
446 | 
447 |     @_try_relogin
448 |     def delete_knowledge_base(self, project: str, knowledge_base_name):
449 |         r = self.session.delete(self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}')
450 |         _raise_for_status(r)
451 | 
452 |     @_try_relogin
453 |     def create_knowledge_base(self, project: str, data):
454 |         r = self.session.post(
455 |             self.url + f'/api/projects/{project}/knowledge_bases',
456 |             json={
457 |                 'knowledge_base': data
458 |             }
459 |         )
460 |         _raise_for_status(r)
461 | 
462 |         return r.json()
463 | 
464 |     def knowledge_base_completion(self, project: str, knowledge_base_name, payload):
465 |         r = self.session.post(
466 |             self.url + f'/api/projects/{project}/knowledge_bases/{knowledge_base_name}/completions',
467 |             json=payload
468 |         )
469 |         _raise_for_status(r)
470 |         return r.json()
471 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/databases.py:
--------------------------------------------------------------------------------
  1 | from typing import Dict, List, Union
  2 | 
  3 | from mindsdb_sql_parser.ast.mindsdb import CreateDatabase
  4 | from mindsdb_sql_parser.ast import DropDatabase, Identifier
  5 | 
  6 | from mindsdb_sdk.utils.objects_collection import CollectionBase
  7 | 
  8 | from .query import Query
  9 | from .tables import Tables
 10 | from .handlers import Handler
 11 | 
 12 | 
 13 | class Database:
 14 |     """
 15 |     Allows to work with database (datasource): to use tables and make raw queries
 16 | 
 17 |     To run native query
 18 |     At this moment query is just saved in Query object and not executed
 19 | 
 20 |     >>> query = database.query('select * from table1') # returns Query
 21 | 
 22 |     This command sends request to server to execute query and return dataframe
 23 | 
 24 |     >>> df = query.fetch()
 25 | 
 26 |     Has list of tables in .tables attribute.
 27 | 
 28 |     """
 29 | 
 30 |     def __init__(self, server, name: str, engine: str = None, params: Dict = None):
 31 |         self.server = server
 32 |         self.name = name
 33 |         self.engine = engine
 34 |         self.api = server.api
 35 |         self.params = params
 36 | 
 37 |         self.tables = Tables(self, self.api)
 38 | 
 39 |         # old api
 40 |         self.get_table = self.tables.get
 41 |         self.list_tables = self.tables.list
 42 |         self.create_table = self.tables.create
 43 |         self.drop_table = self.tables.drop
 44 | 
 45 |     def __repr__(self):
 46 |         return f'{self.__class__.__name__}({self.name})'
 47 | 
 48 |     def query(self, sql: str) -> Query:
 49 |         """
 50 |         Make raw query to integration
 51 | 
 52 |         :param sql: sql of the query
 53 |         :param database: name of database to query (uses current database by default)
 54 |         :return: Query object
 55 |         """
 56 |         return Query(self.api, sql, database=self.name)
 57 | 
 58 | 
 59 | class Databases(CollectionBase):
 60 |     """
 61 |     Databases
 62 |     ----------
 63 | 
 64 |     >>> databases.list()
 65 |     >>> db = databases[0] # Database type object
 66 | 
 67 |     # create
 68 | 
 69 |     >>> db = databases.create('example_db',
 70 |     ...                                 engine='postgres',
 71 |     ...                                 connection_args={'host': ''})
 72 | 
 73 |     # drop database
 74 | 
 75 |     >>> databases.drop('example_db')
 76 | 
 77 |     # get existing
 78 | 
 79 |     >>> db = databases.get('example_db')
 80 | 
 81 |     """
 82 | 
 83 |     def __init__(self, api):
 84 |         self.api = api
 85 | 
 86 |     def _list_databases(self) -> Dict[str, Database]:
 87 |         data = self.api.sql_query(
 88 |             "select NAME, ENGINE, CONNECTION_DATA from information_schema.databases where TYPE='data'"
 89 |         )
 90 |         name_to_db = {}
 91 |         for _, row in data.iterrows():
 92 |             name_to_db[row["NAME"]] = Database(
 93 |                 self, row["NAME"], engine=row["ENGINE"], params=row["CONNECTION_DATA"]
 94 |             )
 95 |         return name_to_db
 96 | 
 97 |     def list(self) -> List[Database]:
 98 |         """
 99 |         Show list of integrations (databases) on server
100 | 
101 |         :return: list of Database objects
102 |         """
103 |         databases = self._list_databases()
104 |         return list(databases.values())
105 | 
106 |     def create(
107 |         self, name: str, engine: Union[str, Handler], connection_args: Dict
108 |     ) -> Database:
109 |         """
110 |         Create new integration and return it
111 | 
112 |         :param name: Identifier for the integration to be created
113 |         :param engine: Engine to be selected depending on the database connection.
114 |         :param connection_args: {"key": "value"} object with the connection parameters specific for each engine
115 |         :return: created Database object
116 |         """
117 |         if isinstance(engine, Handler):
118 |             engine = engine.name
119 | 
120 |         ast_query = CreateDatabase(
121 |             name=Identifier(name),
122 |             engine=engine,
123 |             parameters=connection_args,
124 |         )
125 |         self.api.sql_query(ast_query.to_string())
126 |         return Database(self, name, engine=engine, params=connection_args)
127 | 
128 |     def drop(self, name: str):
129 |         """
130 |         Delete integration
131 | 
132 |         :param name: name of integration
133 |         """
134 |         ast_query = DropDatabase(name=Identifier(name))
135 |         self.api.sql_query(ast_query.to_string())
136 | 
137 |     def get(self, name: str) -> Database:
138 |         """
139 |         Get integration by name
140 | 
141 |         :param name: name of integration
142 |         :return: Database object
143 |         """
144 |         databases = self._list_databases()
145 |         if name not in databases:
146 |             raise AttributeError("Database doesn't exist")
147 |         return databases[name]
148 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/handlers.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | import dataclasses
  3 | from typing import List
  4 | 
  5 | from mindsdb_sql_parser.ast import Show, Identifier, BinaryOperation, Constant
  6 | 
  7 | from mindsdb_sdk.utils.objects_collection import CollectionBase
  8 | 
  9 | 
 10 | @dataclass(init=False)
 11 | class Handler:
 12 |     """
 13 |     :meta private:
 14 |     """
 15 |     name: str
 16 |     title: str
 17 |     version: str
 18 |     description: str
 19 |     connection_args: dict
 20 |     import_success: bool
 21 |     import_error: str
 22 | 
 23 |     def __init__(self, **kwargs):
 24 |         names = set([f.name for f in dataclasses.fields(self)])
 25 |         for k, v in kwargs.items():
 26 |             if k in names:
 27 |                 setattr(self, k, v)
 28 | 
 29 | 
 30 | class Handlers(CollectionBase):
 31 |     """
 32 |     :meta private:
 33 |     """
 34 | 
 35 |     def __init__(self, api, type):
 36 |         self.api = api
 37 |         self.type = type
 38 | 
 39 |     def list(self) -> List[Handler]:
 40 |         """
 41 |         Returns list of handlers on server depending on type
 42 |         :return: list of handlers
 43 |         """
 44 | 
 45 |         ast_query = Show(
 46 |             category='HANDLERS',
 47 |             where=BinaryOperation(
 48 |                 op='=',
 49 |                 args=[
 50 |                     Identifier('type'),
 51 |                     Constant(self.type)
 52 |                 ]
 53 |             )
 54 |         )
 55 | 
 56 |         df = self.api.sql_query(ast_query.to_string())
 57 |         # columns to lower case
 58 |         cols_map = {i: i.lower() for i in df.columns}
 59 |         df = df.rename(columns=cols_map)
 60 | 
 61 |         return [
 62 |             Handler(**item)
 63 |             for item in df.to_dict('records')
 64 |         ]
 65 | 
 66 |     def get(self, name: str) -> Handler:
 67 |         """
 68 |         Get handler by name
 69 | 
 70 |         :param name
 71 |         :return: handler object
 72 |         """
 73 |         name = name.lower()
 74 |         for item in self.list():
 75 |             if item.name == name:
 76 |                 return item
 77 |         raise AttributeError(f"Handler doesn't exist: {name}")
 78 | 
 79 | 
 80 | class MLHandlers(Handlers):
 81 |     """
 82 |        **ML handlers colection**
 83 | 
 84 |        Examples of usage:
 85 | 
 86 |        Get list
 87 | 
 88 |        >>> con.ml_handlers.list()
 89 | 
 90 |        Get
 91 | 
 92 |        >>> openai_handler = con.ml_handlers.openai
 93 |        >>> openai_handler = con.ml_handlers.get('openai')
 94 | 
 95 |     """
 96 | 
 97 |     ...
 98 | 
 99 | 
100 | class DataHandlers(Handlers):
101 |     """
102 |         **DATA handlers colection**
103 | 
104 |         Examples of usage:
105 | 
106 |         Get list
107 | 
108 |         >>> con.data_handlers.list()
109 | 
110 |         Get
111 | 
112 |         >>> pg_handler = con.data_handlers.postgres
113 |         >>> pg_handler = con.data_handlers.get('postgres')
114 | 
115 |     """
116 | 
117 |     ...


--------------------------------------------------------------------------------
/mindsdb_sdk/jobs.py:
--------------------------------------------------------------------------------
  1 | import datetime as dt
  2 | from typing import Union, List
  3 | 
  4 | import pandas as pd
  5 | 
  6 | from mindsdb_sql_parser.ast.mindsdb import CreateJob, DropJob
  7 | from mindsdb_sql_parser.ast import Identifier, Star, Select
  8 | 
  9 | from mindsdb_sdk.query import Query
 10 | from mindsdb_sdk.utils.sql import dict_to_binary_op
 11 | from mindsdb_sdk.utils.objects_collection import CollectionBase
 12 | from mindsdb_sdk.utils.context import set_saving
 13 | 
 14 | 
 15 | class Job:
 16 |     def __init__(self, project, name, data=None, create_callback=None):
 17 |         self.project = project
 18 |         self.name = name
 19 |         self.data = data
 20 | 
 21 |         self.query_str = None
 22 |         if data is not None:
 23 |             self._update(data)
 24 |         self._queries = []
 25 |         self._create_callback = create_callback
 26 | 
 27 |     def _update(self, data):
 28 |         # self.name = data['name']
 29 |         self.query_str = data['query']
 30 |         self.start_at = data['start_at']
 31 |         self.end_at = data['end_at']
 32 |         self.next_run_at = data['next_run_at']
 33 |         self.schedule_str = data['schedule_str']
 34 | 
 35 |     def __repr__(self):
 36 |         return f"{self.__class__.__name__}({self.name}, query='{self.query_str}')"
 37 | 
 38 |     def __enter__(self):
 39 |         if self._create_callback is None:
 40 |             raise ValueError("The job is already created and can't be used to create context."
 41 |                                " To be able to use context: create job without 'query_str' parameter: "
 42 |                                "\n>>> with con.jobs.create('j1') as job:"
 43 |                                "\n>>>    job.add_query(...)")
 44 |         set_saving(f'job-{self.name}')
 45 | 
 46 |         return self
 47 | 
 48 |     def __exit__(self, type, value, traceback):
 49 |         set_saving(None)
 50 |         if type is None:
 51 |             if len(self._queries) == 0:
 52 |                 raise ValueError('No queries were added to job')
 53 | 
 54 |             query_str = '; '.join(self._queries)
 55 | 
 56 |             self._create_callback(query_str)
 57 | 
 58 |             self.refresh()
 59 | 
 60 |     def refresh(self):
 61 |         """
 62 |         Retrieve job data from mindsdb server
 63 |         """
 64 |         job = self.project.get_job(self.name)
 65 |         self._update(job.data)
 66 | 
 67 |     def add_query(self, query: Union[Query, str]):
 68 |         """
 69 |         Add a query to job. Method is used in context of the job
 70 | 
 71 |         >>> with con.jobs.create('j1') as job:
 72 |         >>>    job.add_query(table1.insert(table2))
 73 | 
 74 |         :param query: string or Query object. Query.database should be emtpy or the same as job's project
 75 |         """
 76 |         if isinstance(query, Query):
 77 | 
 78 |             if query.database is not None and query.database != self.project.name:
 79 |                 # we can't execute this query in jobs project
 80 |                 raise ValueError(f"Wrong query database: {query.database}. You could try to use sql string instead")
 81 | 
 82 |             query = query.sql
 83 |         elif not isinstance(query, str):
 84 |             raise ValueError(f'Unable to use add this object as a query: {query}. Try to use sql string instead')
 85 |         self._queries.append(query)
 86 | 
 87 |     def get_history(self) -> pd.DataFrame:
 88 |         """
 89 |         Get history of job execution
 90 | 
 91 |         :return: dataframe with job executions
 92 |         """
 93 |         ast_query = Select(
 94 |             targets=[Star()],
 95 |             from_table=Identifier('jobs_history'),
 96 |             where=dict_to_binary_op({
 97 |                 'name': self.name
 98 |             })
 99 |         )
100 |         return self.project.api.sql_query(ast_query.to_string(), database=self.project.name)
101 | 
102 | 
103 | class Jobs(CollectionBase):
104 |     def __init__(self, project, api):
105 |         self.project = project
106 |         self.api = api
107 | 
108 |     def _list(self, name: str = None) -> List[Job]:
109 | 
110 |         ast_query = Select(targets=[Star()], from_table=Identifier('jobs'))
111 | 
112 |         if name is not None:
113 |             ast_query.where = dict_to_binary_op({'name': name})
114 | 
115 |         df = self.api.sql_query(ast_query.to_string(), database=self.project.name)
116 | 
117 |         # columns to lower case
118 |         cols_map = {i: i.lower() for i in df.columns}
119 |         df = df.rename(columns=cols_map)
120 | 
121 |         return [
122 |             Job(self.project, item.pop('name'), item)
123 |             for item in df.to_dict('records')
124 |         ]
125 | 
126 |     def list(self) -> List[Job]:
127 |         """
128 |         Show list of jobs in project
129 | 
130 |         :return: list of Job objects
131 |         """
132 | 
133 |         return self._list()
134 | 
135 |     def get(self, name: str) -> Job:
136 |         """
137 |         Get job by name from project
138 | 
139 |         :param name: name of the job
140 |         :return: Job object
141 |         """
142 | 
143 |         jobs = self._list(name)
144 |         if len(jobs) == 1:
145 |             return jobs[0]
146 |         elif len(jobs) == 0:
147 |             raise AttributeError("Job doesn't exist")
148 |         else:
149 |             raise RuntimeError("Several jobs with the same name")
150 | 
151 |     def create(
152 |             self,
153 |             name: str,
154 |             query_str: str = None,
155 |             start_at: dt.datetime = None,
156 |             end_at: dt.datetime = None,
157 |             repeat_str: str = None,
158 |             repeat_min: int = None,
159 |         ) -> Union[Job, None]:
160 |         """
161 |         Create new job in project and return it.
162 | 
163 |         If it is not possible (job executed and not accessible anymore):
164 |            return None
165 | 
166 |         Usage options:
167 | 
168 |         Option 1: to use string query
169 |         All job tasks could be passed as string with sql queries. Job is created immediately
170 | 
171 |         >>> job = con.jobs.create('j1', query_str='retrain m1; show models', repeat_min=1):
172 | 
173 |         Option 2: to use 'with' block.
174 |         It allows to pass sdk commands to job tasks.
175 |         Not all sdk commands could be accepted here,
176 |          only those which are converted in to sql in sdk and sent to /query endpoint
177 |         Adding query sql string is accepted as well
178 |         Job will be created after exit from 'with' block
179 | 
180 |         >>> with con.jobs.create('j1', repeat_min=1) as job:
181 |         >>>    job.add_query(table1.insert(table2))
182 |         >>>    job.add_query('retrain m1')  # using string
183 | 
184 |         More info about jobs: https://docs.mindsdb.com/sql/create/jobs
185 | 
186 |         :param name: name of the job
187 |         :param query_str: str, job's query (or list of queries with ';' delimiter) which job have to execute
188 |         :param start_at: datetime, first start of job,
189 |         :param end_at: datetime, when job have to be stopped,
190 |         :param repeat_str: str, optional, how to repeat job (e.g. '1 hour', '2 weeks', '3 min')
191 |         :param repeat_min: int, optional, period to repeat the job in minutes
192 |         :return: Job object or None
193 |         """
194 | 
195 |         if start_at is not None:
196 |             start_str = start_at.strftime("%Y-%m-%d %H:%M:%S")
197 |         else:
198 |             start_str = None
199 | 
200 |         if end_at is not None:
201 |             end_str = end_at.strftime("%Y-%m-%d %H:%M:%S")
202 |         else:
203 |             end_str = None
204 | 
205 |         if repeat_min is not None:
206 |             repeat_str = f'{repeat_min} minutes'
207 | 
208 |         def _create_callback(query):
209 |             ast_query = CreateJob(
210 |                 name=Identifier(name),
211 |                 query_str=query,
212 |                 start_str=start_str,
213 |                 end_str=end_str,
214 |                 repeat_str=repeat_str
215 |             )
216 | 
217 |             self.api.sql_query(ast_query.to_string(), database=self.project.name)
218 | 
219 |         if query_str is None:
220 |             # allow to create context with job
221 |             job = Job(self.project, name, create_callback=_create_callback)
222 |             return job
223 |         else:
224 |             # create it
225 |             _create_callback(query_str)
226 | 
227 |             # job can be executed and remove it is not repeatable
228 |             jobs = self._list(name)
229 |             if len(jobs) == 1:
230 |                 return jobs[0]
231 | 
232 | 
233 |     def drop(self, name: str):
234 |         """
235 |         Drop job from project
236 | 
237 |         :param name: name of the job
238 |         """
239 |         ast_query = DropJob(Identifier(name))
240 | 
241 |         self.api.sql_query(ast_query.to_string(), database=self.project.name)
242 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/knowledge_bases.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import json
  3 | from typing import Union, List, Iterable
  4 | 
  5 | import pandas as pd
  6 | 
  7 | from mindsdb_sql_parser.ast.mindsdb import CreateKnowledgeBase, DropKnowledgeBase
  8 | from mindsdb_sql_parser.ast import Identifier, Star, Select, BinaryOperation, Constant, Insert
  9 | 
 10 | from mindsdb_sdk.utils.sql import dict_to_binary_op, query_to_native_query
 11 | from mindsdb_sdk.utils.objects_collection import CollectionBase
 12 | from mindsdb_sdk.utils.context import is_saving
 13 | 
 14 | from .models import Model
 15 | from .tables import Table
 16 | from .query import Query
 17 | from .databases import Database
 18 | 
 19 | MAX_INSERT_SIZE = 1000
 20 | 
 21 | 
 22 | def split_data(data: Union[pd.DataFrame, list], partition_size: int) -> Iterable:
 23 |     """
 24 |     Split data into chunks with partition_size and yield them out
 25 |     """
 26 |     num = 0
 27 |     while num * partition_size < len(data):
 28 |         # create results with partition
 29 |         yield data[num * partition_size: (num + 1) * partition_size]
 30 |         num += 1
 31 | 
 32 | 
 33 | class KnowledgeBase(Query):
 34 |     """
 35 | 
 36 |     Knowledge base object, used to update or query knowledge base
 37 | 
 38 |     Add data to knowledge base:
 39 | 
 40 |     >>> kb.insert(pd.read_csv('house_sales.csv'))
 41 | 
 42 |     Query relevant results
 43 | 
 44 |     >>> df = kb.find('flats').fetch()
 45 | 
 46 |     """
 47 | 
 48 |     def __init__(self, api, project, data: dict):
 49 |         self.api = api
 50 |         self.project = project
 51 |         self.name = data['name']
 52 |         self.table_name = Identifier(parts=[self.project.name, self.name])
 53 | 
 54 |         self.storage = None
 55 |         if data.get('vector_database_table') is not None:
 56 |             database = Database(project, data['vector_database'])
 57 |             table = Table(database, data['vector_database_table'])
 58 |             self.storage = table
 59 | 
 60 |         self.model = None
 61 |         if data['embedding_model'] is not None:
 62 |             self.model = Model(self.project, {'name': data['embedding_model']})
 63 | 
 64 |         params = data.get('params', {})
 65 |         if isinstance(params, str):
 66 |             try:
 67 |                 params = json.loads(params)
 68 |             except json.JSONDecodeError:
 69 |                 params = {}
 70 | 
 71 |         # columns
 72 |         self.metadata_columns = params.pop('metadata_columns', [])
 73 |         self.content_columns = params.pop('content_columns', [])
 74 |         self.id_column = params.pop('id_column', None)
 75 | 
 76 |         self.params = params
 77 | 
 78 |         # query behavior
 79 |         self._query = None
 80 |         self._limit = None
 81 | 
 82 |         self._update_query()
 83 | 
 84 |         # empty database
 85 |         super().__init__(project.api, self.sql, None)
 86 | 
 87 |     def __repr__(self):
 88 |         return f'{self.__class__.__name__}({self.project.name}.{self.name})'
 89 | 
 90 |     def find(self, query: str, limit: int = 100):
 91 |         """
 92 | 
 93 |         Query data from knowledge base.
 94 |         Knowledge base should return a most relevant results for the query
 95 | 
 96 |         >>> # query knowledge base
 97 |         >>> query = my_kb.find('dogs')
 98 |         >>> # fetch dataframe to client
 99 |         >>> print(query.fetch())
100 | 
101 |         :param query: text query
102 |         :param limit: count of rows in result, default 100
103 |         :return: Query object
104 |         """
105 | 
106 |         kb = copy.deepcopy(self)
107 |         kb._query = query
108 |         kb._limit = limit
109 |         kb._update_query()
110 | 
111 |         return kb
112 | 
113 |     def _update_query(self):
114 | 
115 |         ast_query = Select(
116 |             targets=[Star()],
117 |             from_table=self.table_name
118 |         )
119 |         if self._query is not None:
120 |             ast_query.where = BinaryOperation(op='=', args=[
121 |                 Identifier('content'),
122 |                 Constant(self._query)
123 |             ])
124 | 
125 |         if self._limit is not None:
126 |             ast_query.limit = Constant(self._limit)
127 |         self.sql = ast_query.to_string()
128 | 
129 |     def insert_files(self, file_paths: List[str], params: dict = None):
130 |         """
131 |         Insert data from file to knowledge base
132 |         """
133 |         data = {'files': file_paths}
134 |         if params:
135 |             data['params'] = params
136 | 
137 |         self.api.insert_into_knowledge_base(
138 |             self.project.name,
139 |             self.name,
140 |             data=data
141 |         )
142 | 
143 |     def insert_webpages(self, urls: List[str], crawl_depth: int = 1,
144 |                         filters: List[str] = None, limit=None, params: dict = None):
145 |         """
146 |         Insert data from crawled URLs to knowledge base.
147 | 
148 |         :param urls: URLs to be crawled and inserted.
149 |         :param crawl_depth: How deep to crawl from each base URL. 0 = scrape given URLs only
150 |         :param filters: Include only URLs that match these regex patterns
151 |         :param limit: max count of pages to crawl
152 |         :param params: Runtime parameters for KB
153 |         """
154 |         data={
155 |             'urls': urls,
156 |             'crawl_depth': crawl_depth,
157 |             'limit': limit,
158 |             'filters': [] if filters is None else filters,
159 |         }
160 |         if params:
161 |             data['params'] = params
162 |         self.api.insert_into_knowledge_base(
163 |             self.project.name,
164 |             self.name,
165 |             data=data
166 |         )
167 | 
168 |     def insert(self, data: Union[pd.DataFrame, Query, dict, list], params: dict = None):
169 |         """
170 |         Insert data to knowledge base
171 | 
172 |         >>> # using dataframe
173 |         >>> my_kb.insert(pd.read_csv('house_sales.csv'))
174 |         >>> # using dict
175 |         >>> my_kb.insert({'type': 'house', 'date': '2020-02-02'})
176 | 
177 |         If id is already exists in knowledge base:
178 |         - it will be replaced
179 |         - `id` column can be defined by id_column param, see create knowledge base
180 | 
181 |         :param data: Dataframe or Query object or dict.
182 |         :param params: Runtime parameters for KB
183 |         """
184 | 
185 |         if isinstance(data, Query):
186 |             # for back compatibility
187 |             return self.insert_query(data)
188 | 
189 |         if isinstance(data, dict):
190 |             data = [data]
191 |         elif isinstance(data, pd.DataFrame):
192 |             for df in split_data(data, MAX_INSERT_SIZE):
193 |                 data = df.to_dict('records')
194 |                 self.insert(data, params=params)
195 |             return
196 |         elif not isinstance(data, list):
197 |             raise ValueError("Unknown data type, accepted types: DataFrame, Query, dict, list")
198 | 
199 |         # chunking a big input data
200 |         if len(data) > MAX_INSERT_SIZE:
201 |             for chunk in split_data(data, MAX_INSERT_SIZE):
202 |                 self.insert(chunk, params=params)
203 |             return
204 | 
205 |         data = {'rows': data}
206 |         if params:
207 |             data['params'] = params
208 |         return self.api.insert_into_knowledge_base(
209 |             self.project.name,
210 |             self.name,
211 |             data=data,
212 |         )
213 | 
214 |     def insert_query(self, data: Query, params: dict = None):
215 |         """
216 |         Insert data to knowledge base using query
217 | 
218 |         >>> my_kb.insert(server.databases.example_db.tables.houses_sales.filter(type='house'))
219 | 
220 |         Data will be if id (defined by id_column param, see create knowledge base) is already exists in knowledge base
221 |         it will be replaced
222 | 
223 |         :param data: Dataframe or Query object or dict.
224 |         :param params: Runtime parameters for KB
225 |         """
226 |         if is_saving():
227 |             # generate insert from select query
228 |             if data.database is not None:
229 |                 ast_query = Insert(
230 |                     table=self.table_name,
231 |                     from_select=query_to_native_query(data)
232 |                 )
233 |                 sql = ast_query.to_string()
234 |             else:
235 |                 sql = f'INSERT INTO {self.table_name.to_string()} ({data.sql})'
236 | 
237 |             # don't execute it right now, return query object
238 |             return Query(self, sql, self.database)
239 | 
240 |         # query have to be in context of mindsdb project
241 |         data = {'query': data.sql}
242 |         if params:
243 |             data['params'] = params
244 |         self.api.insert_into_knowledge_base(
245 |             self.project.name,
246 |             self.name,
247 |             data=data
248 |         )
249 | 
250 |     def completion(self, query, **data):
251 |         data['query'] = query
252 | 
253 |         return self.api.knowledge_base_completion(self.project.name, self.name, data)
254 | 
255 | 
256 | class KnowledgeBases(CollectionBase):
257 |     """
258 |     **Knowledge bases**
259 | 
260 |     Get list:
261 | 
262 |     >>> kb_list = server.knowledge_bases.list()
263 |     >>> kb = kb_list[0]
264 | 
265 |     Get by name:
266 | 
267 |     >>> kb = server.knowledge_bases.get('my_kb')
268 |     >>> # or :
269 |     >>> kb = server.knowledge_bases.my_kb
270 | 
271 |     Create:
272 | 
273 |     >>> kb = server.knowledge_bases.create('my_kb')
274 | 
275 |     Drop:
276 | 
277 |     >>> server.knowledge_bases.drop('my_kb')
278 | 
279 |     """
280 | 
281 |     def __init__(self, project, api):
282 |         self.project = project
283 |         self.api = api
284 | 
285 |     def list(self) -> List[KnowledgeBase]:
286 |         """
287 | 
288 |         Get list of knowledge bases inside of project:
289 | 
290 |         >>> kb_list = project.knowledge_bases.list()
291 | 
292 |         :return: list of knowledge bases
293 |         """
294 | 
295 |         return [
296 |             KnowledgeBase(self.api, self.project, item)
297 |             for item in self.api.list_knowledge_bases(self.project.name)
298 |         ]
299 | 
300 |     def get(self, name: str) -> KnowledgeBase:
301 |         """
302 |         Get knowledge base by name
303 | 
304 |         :param name: name of the knowledge base
305 |         :return: KnowledgeBase object
306 |         """
307 | 
308 |         data = self.api.get_knowledge_base(self.project.name, name)
309 |         return KnowledgeBase(self.api, self.project, data)
310 | 
311 |     def create(
312 |         self,
313 |         name: str,
314 |         model: Model = None,
315 |         storage: Table = None,
316 |         metadata_columns: list = None,
317 |         content_columns: list = None,
318 |         id_column: str = None,
319 |         params: dict = None,
320 |     ) -> Union[KnowledgeBase, Query]:
321 |         """
322 | 
323 |         Create knowledge base:
324 | 
325 |         >>> kb = server.knowledge_bases.create(
326 |         ...   'my_kb',
327 |         ...   model=server.models.emb_model,
328 |         ...   storage=server.databases.pvec.tables.tbl1,
329 |         ...   metadata_columns=['date', 'author'],
330 |         ...   content_columns=['review', 'description'],
331 |         ...   id_column='number',
332 |         ...   params={'a': 1}
333 |         ...)
334 | 
335 |         :param name: name of the knowledge base
336 |         :param model: embedding model, optional. Default: 'sentence_transformers' will be used (defined in mindsdb server)
337 |         :param storage: vector storage, optional. Default: chromadb database will be created
338 |         :param metadata_columns: columns to use as metadata, optional. Default: all columns which are not content and id
339 |         :param content_columns: columns to use as content, optional. Default: all columns except id column
340 |         :param id_column: the column to use as id, optinal. Default: 'id', if exists
341 |         :param params: other parameters to knowledge base
342 |         :return: created KnowledgeBase object
343 |         """
344 | 
345 |         params_out = {}
346 | 
347 |         if metadata_columns is not None:
348 |             params_out['metadata_columns'] = metadata_columns
349 | 
350 |         if content_columns is not None:
351 |             params_out['content_columns'] = content_columns
352 | 
353 |         if id_column is not None:
354 |             params_out['id_column'] = id_column
355 | 
356 |         if params is not None:
357 |             params_out.update(params)
358 | 
359 |         if model is not None:
360 |             model = model.name
361 | 
362 |         payload = {
363 |             'name': name,
364 |             'model': model,
365 |             'params': params_out
366 |         }
367 | 
368 |         if storage is not None:
369 |             payload['storage'] = {
370 |                 'database': storage.db.name,
371 |                 'table': storage.name
372 |             }
373 | 
374 |         self.api.create_knowledge_base(self.project.name, data=payload)
375 | 
376 |         return self.get(name)
377 | 
378 |     def drop(self, name: str):
379 |         """
380 | 
381 |         :param name:
382 |         :return:
383 |         """
384 | 
385 |         return self.api.delete_knowledge_base(self.project.name, name)
386 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/ml_engines.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from typing import List, Union
  3 | 
  4 | from mindsdb_sql_parser.ast import Show, Identifier
  5 | from mindsdb_sql_parser.ast.mindsdb import CreateMLEngine, DropMLEngine
  6 | 
  7 | from mindsdb_sdk.utils.objects_collection import CollectionBase
  8 | 
  9 | from .handlers import Handler
 10 | 
 11 | @dataclass
 12 | class MLEngine:
 13 |     """
 14 |     :meta private:
 15 |     """
 16 |     name: str
 17 |     handler: str
 18 |     connection_data: dict
 19 | 
 20 | 
 21 | class MLEngines(CollectionBase):
 22 |     """
 23 | 
 24 |     **ML engines collection**
 25 | 
 26 |     Examples of usage:
 27 | 
 28 |     Get list
 29 | 
 30 |     >>> ml_engines = con.ml_engines.list()
 31 | 
 32 |     Get
 33 | 
 34 |     >>> openai_engine = con.ml_engines.openai1
 35 | 
 36 |     Create
 37 | 
 38 |     >>> con.ml_engines.create(
 39 |     ...    'openai1',
 40 |     ...    'openai',
 41 |     ...    connection_data={'api_key': '111'}
 42 |     ...)
 43 | 
 44 |     Drop
 45 | 
 46 |     >>>  con.ml_engines.drop('openai1')
 47 | 
 48 |     Upload BYOM model.
 49 |     After uploading a new ml engin will be availbe to create new model from it.
 50 | 
 51 |     >>> model_code = open('/path/to/model/code').read()
 52 |     >>> model_requirements = open('/path/to/model/requirements').read()
 53 |     >>> ml_engine = con.ml_engines.create_byom(
 54 |     ...    'my_byom_engine',
 55 |     ...    code=model_code,
 56 |     ...    requirements=model_requirements
 57 |     ...)
 58 | 
 59 |     """
 60 | 
 61 |     def __init__(self, api):
 62 |         self.api = api
 63 | 
 64 |     def list(self) -> List[MLEngine]:
 65 |         """
 66 |         Returns list of ml engines on server
 67 | 
 68 |         :return: list of ml engines
 69 |         """
 70 | 
 71 |         ast_query = Show(category='ml_engines')
 72 | 
 73 |         df = self.api.sql_query(ast_query.to_string())
 74 |         # columns to lower case
 75 |         cols_map = {i: i.lower() for i in df.columns}
 76 |         df = df.rename(columns=cols_map)
 77 | 
 78 |         return [
 79 |             MLEngine(**item)
 80 |             for item in df.to_dict('records')
 81 |         ]
 82 | 
 83 |     def get(self, name: str) -> MLEngine:
 84 |         """
 85 |         Get ml engine by name
 86 | 
 87 |         :param name
 88 |         :return: ml engine object
 89 |         """
 90 |         name = name.lower()
 91 |         for item in self.list():
 92 |             if item.name == name:
 93 |                 return item
 94 |         raise AttributeError(f"MLEngine doesn't exist {name}")
 95 | 
 96 |     def create(self, name: str, handler: Union[str, Handler], connection_data: dict = None) -> MLEngine:
 97 |         """
 98 |         Create new ml engine and return it
 99 | 
100 |         :param name: ml engine name, string
101 |         :param handler: handler name, string or Handler
102 |         :param connection_data: parameters for ml engine, dict, optional
103 |         :return: created ml engine object
104 |         """
105 | 
106 |         if isinstance(handler, Handler):
107 |             handler = handler.name
108 | 
109 |         ast_query = CreateMLEngine(Identifier(name), handler, params=connection_data)
110 | 
111 |         self.api.sql_query(ast_query.to_string())
112 | 
113 |         return MLEngine(name, handler, connection_data)
114 | 
115 |     def create_byom(self, name: str, code: str, requirements: Union[str, List[str]] = None):
116 |         """
117 |         Create new BYOM ML engine and return it
118 | 
119 |         :param code: model python code in string
120 |         :param requirements: requirements for model. Optional if there is no special requirements.
121 |            It can be content of 'requirement.txt' file or list of strings (item for every requirement).
122 |         :return: created BYOM ml engine object
123 |         """
124 | 
125 |         if requirements is None:
126 |             requirements = ''
127 |         elif isinstance(requirements, list):
128 |             requirements = '\n'.join(requirements)
129 | 
130 |         self.api.upload_byom(name, code, requirements)
131 | 
132 |         return MLEngine(name, 'byom', {})
133 | 
134 |     def drop(self, name: str):
135 |         """
136 |         Drop ml engine by name
137 | 
138 |         :param name: name
139 |         """
140 |         ast_query = DropMLEngine(Identifier(name))
141 | 
142 |         self.api.sql_query(ast_query.to_string())
143 | 
144 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/models.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | 
  3 | import time
  4 | from typing import List, Union
  5 | 
  6 | import pandas as pd
  7 | 
  8 | from mindsdb_sql_parser.ast.mindsdb import CreatePredictor, DropPredictor
  9 | from mindsdb_sql_parser.ast.mindsdb import RetrainPredictor, FinetunePredictor
 10 | from mindsdb_sql_parser.ast import Identifier, Select, Star, Join, Describe, Set
 11 | from mindsdb_sql_parser import parse_sql
 12 | from mindsdb_sql_parser.exceptions import ParsingException
 13 | 
 14 | from .ml_engines import MLEngine
 15 | 
 16 | from mindsdb_sdk.utils.objects_collection import CollectionBase
 17 | from mindsdb_sdk.utils.sql import dict_to_binary_op, query_to_native_query
 18 | from mindsdb_sdk.utils.context import is_saving
 19 | 
 20 | from .query import Query
 21 | 
 22 | 
 23 | class Model:
 24 |     """
 25 | 
 26 |     Versions
 27 | 
 28 |     List model versions
 29 | 
 30 |     >>> model.list_versions()
 31 | 
 32 | 
 33 |     Get info
 34 | 
 35 |     >>> print(model.get_status())
 36 |     >>> print(model.data)
 37 | 
 38 |     Update model data from server
 39 | 
 40 |     >>> model.refresh()
 41 | 
 42 |     **Usng model**
 43 | 
 44 |     Dataframe on input
 45 | 
 46 |     >>> result_df = model.predict(df_rental)
 47 |     >>> result_df = model.predict(df_rental, params={'a': 'q'})
 48 | 
 49 |     Dict on input
 50 | 
 51 |     >>> result_df = model.predict({'n_rooms': 2})
 52 | 
 53 |     Deferred query on input
 54 | 
 55 |     >>> result_df = model.predict(query, params={'': ''})
 56 | 
 57 |     Time series prediction
 58 | 
 59 |     >>> query = database.query('select * from table1 where type="house" and saledate>latest')
 60 |     >>> model.predict(query)
 61 | 
 62 |     The join model with table in raw query
 63 | 
 64 |     >>> result_df = project.query('''
 65 |     ...  SELECT m.saledate as date, m.ma as forecast
 66 |     ...   FROM mindsdb.house_sales_model as m
 67 |     ...   JOIN example_db.demo_data.house_sales as t
 68 |     ...  WHERE t.saledate > LATEST AND t.type = 'house'
 69 |     ...   AND t.bedrooms=2
 70 |     ...  LIMIT 4;
 71 |     ...''').fetch()
 72 | 
 73 | 
 74 |     **Model managing**
 75 | 
 76 |     Fine-tuning
 77 | 
 78 |     >>> model.finetune(query)
 79 |     >>> model.finetune('select * from demo_data.house_sales', database='example_db')
 80 |     >>> model.finetune(query, params={'x': 2})
 81 | 
 82 |     Retraining
 83 | 
 84 |     >>> model.retrain(query)
 85 |     >>> model.retrain('select * from demo_data.house_sales', database='example_db')
 86 |     >>> model.retrain(query, params={'x': 2})
 87 | 
 88 |     Describe
 89 | 
 90 |     >>> df_info = model.describe()
 91 |     >>> df_info = model.describe('features')
 92 | 
 93 |     Change active version
 94 | 
 95 |     >>> model.set_active(version=3)
 96 | 
 97 |     """
 98 | 
 99 |     def __init__(self, project, data):
100 |         self.project = project
101 | 
102 |         self.data = data
103 |         self.name = data['name']
104 |         self.version = None
105 | 
106 |     def __repr__(self):
107 |         version = ''
108 |         if self.version is not None:
109 |             version = f', version={self.version}'
110 |         return f'{self.__class__.__name__}({self.name}{version}, status={self.data.get("status")})'
111 | 
112 |     def _get_identifier(self):
113 |         parts = [self.project.name, self.name]
114 |         if self.version is not None:
115 |             parts.append(str(self.version))
116 |         return Identifier(parts=parts)
117 | 
118 |     def predict(self, data: Union[pd.DataFrame, Query, dict], params: dict = None) -> Union[pd.DataFrame, Query]:
119 |         """
120 |         Make prediction using model
121 | 
122 |         if data is dataframe
123 |           it uses /model/predict http method and sends dataframe over it
124 | 
125 |         if data is select query with one table
126 |          it replaces table to jon table and predictor and sends query over sql/query http method
127 | 
128 |         if data is select from join other complex query it modifies query to:
129 |           'select from (input query) join model' and sends it over sql/query http method
130 | 
131 |         :param data: dataframe or Query object as input to predictor
132 |         :param params: parameters for predictor, optional
133 |         :return: dataframe with result of prediction
134 |         """
135 | 
136 |         if isinstance(data, Query):
137 |             # create join from select if it is simple select
138 |             try:
139 |                 ast_query = parse_sql(data.sql, dialect='mindsdb')
140 |             except ParsingException:
141 |                 ast_query = None
142 | 
143 |             # injection of join disabled yet
144 |             # if isinstance(ast_query, Select) and isinstance(ast_query.from_table, Identifier):
145 |             #     # inject aliases
146 |             #     if ast_query.from_table.alias is None:
147 |             #         alias = 't'
148 |             #         ast_query.from_table.alias = Identifier(alias)
149 |             #     else:
150 |             #         alias = ast_query.from_table.alias.parts[-1]
151 |             #
152 |             #     def inject_alias(node, is_table, **kwargs):
153 |             #         if not is_table:
154 |             #             if isinstance(node, Identifier):
155 |             #                 if node.parts[0] != alias:
156 |             #                     node.parts.insert(0, alias)
157 |             #
158 |             #     query_traversal(ast_query, inject_alias)
159 |             #
160 |             #     # replace table with join
161 |             #     model_identifier = self._get_identifier()
162 |             #     model_identifier.alias = Identifier('m')
163 |             #
164 |             #     ast_query.from_table = Join(
165 |             #         join_type='join',
166 |             #         left=ast_query.from_table,
167 |             #         right=model_identifier
168 |             #     )
169 |             #
170 |             #     # select only model columns
171 |             #     ast_query.targets = [Identifier(parts=['m', Star()])]
172 |             #
173 | 
174 |             model_identifier = self._get_identifier()
175 |             model_identifier.alias = Identifier('m')
176 | 
177 |             if data.database is not None or ast_query is None or not isinstance(ast_query, Select):
178 |                 # use native query
179 |                 native_query = query_to_native_query(data)
180 |                 native_query.parentheses = True
181 |                 native_query.alias = Identifier('t')
182 |                 upper_query = Select(
183 |                     targets=[Identifier(parts=['m', Star()])],
184 |                     from_table=Join(
185 |                         join_type='join',
186 |                         left=native_query,
187 |                         right=model_identifier
188 |                     )
189 |                 )
190 |             else:
191 |                 # wrap query to subselect
192 |                 model_identifier = self._get_identifier()
193 |                 model_identifier.alias = Identifier('m')
194 | 
195 |                 ast_query.parentheses = True
196 |                 ast_query.alias = Identifier('t')
197 |                 upper_query = Select(
198 |                     targets=[Identifier(parts=['m', Star()])],
199 |                     from_table=Join(
200 |                         join_type='join',
201 |                         left=ast_query,
202 |                         right=model_identifier
203 |                     )
204 |                 )
205 |             if params is not None:
206 |                 upper_query.using = params
207 |             # execute in query's database
208 |             sql = upper_query.to_string()
209 |             if is_saving():
210 |                 return Query(self, sql)
211 | 
212 |             return self.project.api.sql_query(sql, database=None)
213 | 
214 |         elif isinstance(data, dict):
215 |             data = pd.DataFrame([data])
216 |             return self.project.api.model_predict(self.project.name, self.name, data,
217 |                                                   params=params, version=self.version)
218 |         elif isinstance(data, pd.DataFrame):
219 |             return self.project.api.model_predict(self.project.name, self.name, data,
220 |                                                   params=params, version=self.version)
221 |         else:
222 |             raise ValueError('Unknown input')
223 | 
224 |     def wait_complete(self):
225 | 
226 |         for i in range(400):
227 |             time.sleep(0.3)
228 | 
229 |             status = self.get_status()
230 |             if status in ('generating', 'training'):
231 |                 continue
232 |             elif status == 'error':
233 |                 raise RuntimeError(f'Training failed: {self.data["error"]}')
234 |             else:
235 |                 break
236 | 
237 |     def get_status(self) -> str:
238 |         """
239 |         Refresh model data and return status of model
240 | 
241 |         :return: model status
242 |         """
243 |         self.refresh()
244 |         return self.data['status']
245 | 
246 |     def refresh(self):
247 |         """
248 |         Refresh model data from mindsdb server
249 |         Model data can be changed during training process
250 | 
251 |         :return: model data
252 |         """
253 |         model = self.project.get_model(self.name, self.version)
254 |         self.data = model.data
255 |         return self.data
256 | 
257 |     def finetune(self,
258 |                query: Union[str, Query] = None,
259 |                database: str = None,
260 |                options: dict = None,
261 |                engine: str = None) -> Union[Model, ModelVersion]:
262 |         """
263 |         Call finetune of the model
264 | 
265 |         :param query: sql string or Query object to get data for fine-tuning, optional
266 |         :param database: database to get data for fine-tuning, optional
267 |         :param options: parameters for fine-tuning model, optional
268 |         :param engine: ml engine, optional
269 |         :return: Model object
270 |         """
271 |         return self._retrain(ast_class=FinetunePredictor,
272 |                              query=query, database=database,
273 |                              options=options, engine=engine)
274 | 
275 |     def retrain(self,
276 |                query: Union[str, Query] = None,
277 |                database: str = None,
278 |                options: dict = None,
279 |                engine: str = None) -> Union[Model, ModelVersion]:
280 |         """
281 |         Call retrain of the model
282 | 
283 |         :param query: sql string or Query object to get data for retraining, optional
284 |         :param database: database to get data for retraining, optional
285 |         :param options: parameters for retraining model, optional
286 |         :param engine: ml engine, optional
287 |         :return: Model object
288 |         """
289 |         return self._retrain(ast_class=RetrainPredictor,
290 |                              query=query, database=database,
291 |                              options=options, engine=engine)
292 | 
293 |     def _retrain(self,
294 |                  ast_class,
295 |                  query: Union[str, Query] = None,
296 |                  database:str = None,
297 |                  options:dict = None,
298 |                  engine:str = None):
299 |         if isinstance(query, Query):
300 |             database = query.database
301 |             query = query.sql
302 |         elif isinstance(query, pd.DataFrame):
303 |             raise NotImplementedError('Dataframe as input for training model is not supported yet')
304 | 
305 |         if database is not None:
306 |             database = Identifier(database)
307 | 
308 |         if options is None:
309 |             options = {}
310 |         if engine is not None:
311 |             options['engine'] = engine
312 | 
313 |         ast_query = ast_class(
314 |             name=self._get_identifier(),
315 |             query_str=query,
316 |             integration_name=database,
317 |             using=options or None,
318 |         )
319 |         sql = ast_query.to_string()
320 | 
321 |         if is_saving():
322 |             return Query(self, sql)
323 | 
324 |         data = self.project.api.sql_query(sql)
325 |         data = {k.lower(): v for k, v in data.items()}
326 | 
327 |         # return new instance
328 |         base_class = self.__class__
329 |         return base_class(self.project, data)
330 | 
331 |     def describe(self, type: str = None) -> Union[pd.DataFrame, Query]:
332 |         """
333 |         Return description of the model
334 | 
335 |         :param type: describe type (for lightwood is models, ensemble, features), optional
336 |         :return: dataframe with result of description
337 |         """
338 |         if self.version is not None:
339 |             raise NotImplementedError
340 | 
341 |         identifier = self._get_identifier()
342 |         if type is not None:
343 |             identifier.parts.append(type)
344 |         ast_query = Describe(identifier)
345 | 
346 |         sql = ast_query.to_string()
347 |         if is_saving():
348 |             return Query(self, sql)
349 | 
350 |         return self.project.api.sql_query(sql)
351 | 
352 |     def list_versions(self) -> List[ModelVersion]:
353 |         """
354 |         Show list of model versions
355 | 
356 |         :return: list ModelVersion objects
357 |         """
358 |         return self.project.list_models(with_versions=True, name=self.name)
359 | 
360 |     def get_version(self, num: int) -> ModelVersion:
361 |         """
362 |         Get model version by number
363 | 
364 |         :param num: version number
365 |         :return: ModelVersion object
366 |         """
367 | 
368 |         num = int(num)
369 |         for m in self.project.list_models(with_versions=True, name=self.name):
370 |             if m.version == num:
371 |                 return m
372 |         raise ValueError('Version is not found')
373 | 
374 |     def drop_version(self, num: int) -> ModelVersion:
375 |         """
376 |         Drop version of the model
377 | 
378 |         >>> models.rentals_model.drop_version(version=10)
379 | 
380 |         :param num: version to drop
381 |         """
382 | 
383 |         return self.project.drop_model_version(self.name, num)
384 | 
385 |     def set_active(self, version: int):
386 |         """
387 |         Change model active version
388 | 
389 |         :param version: version to set active
390 |         """
391 |         ast_query = Set(
392 |             category='active',
393 |             value=Identifier(parts=[self.project.name, self.name, str(version)])
394 |         )
395 |         sql = ast_query.to_string()
396 |         if is_saving():
397 |             return Query(self, sql)
398 | 
399 |         self.project.api.sql_query(sql)
400 |         self.refresh()
401 | 
402 | 
403 | class ModelVersion(Model):
404 |     def __init__(self, project, data):
405 | 
406 |         super().__init__(project, data)
407 | 
408 |         self.version = data['version']
409 | 
410 | 
411 | class Models(CollectionBase):
412 |     """
413 | 
414 |     **Models**
415 | 
416 |     Get:
417 | 
418 |     >>> all_models = models.list()
419 |     >>> model = all_models[0]
420 | 
421 |     Get version:
422 | 
423 |     >>> all_models = models.list(with_versions=True)
424 |     >>> model = all_models[0]
425 | 
426 |     By name:
427 | 
428 |     >>> model = models.get('model1')
429 |     >>> model = models.get('model1', version=2)
430 | 
431 |     """
432 | 
433 |     def __init__(self, project, api):
434 |         self.project = project
435 |         self.api = api
436 | 
437 |     def create(
438 |         self,
439 |         name: str,
440 |         predict: str = None,
441 |         engine: Union[str, MLEngine] = None,
442 |         query: Union[str, Query] = None,
443 |         database: str = None,
444 |         options: dict = None,
445 |         timeseries_options: dict = None, **kwargs
446 |     ) -> Union[Model, Query]:
447 |         """
448 |         Create new model in project and return it
449 | 
450 |         If query/database is passed, it will be executed on mindsdb side
451 | 
452 |         Create, using params and qeury as string
453 | 
454 |         >>> model = models.create(
455 |         ...   'rentals_model',
456 |         ...   predict='price',
457 |         ...   engine='lightwood',
458 |         ...   database='example_db',
459 |         ...   query='select * from table',
460 |         ...   options={
461 |         ...       'module': 'LightGBM'
462 |         ...   },
463 |         ...   timeseries_options={
464 |         ...       'order': 'date',
465 |         ...       'group': ['a', 'b']
466 |         ...   }
467 |         ...)
468 |     
469 |         Create, using deferred query. 'query' will be executed and converted to dataframe on mindsdb backend.
470 | 
471 |         >>> query = databases.db.query('select * from table')
472 |         >>> model = models.create(
473 |         ...   'rentals_model',
474 |         ...   predict='price',
475 |         ...   query=query,
476 |         ...)
477 | 
478 |         :param name: name of the model
479 |         :param predict: prediction target
480 |         :param engine: ml engine for new model, default is mindsdb
481 |         :param query: sql string or Query object to get data for training of model, optional
482 |         :param database: database to get data for training, optional
483 |         :param options: parameters for model, optional
484 |         :param timeseries_options: parameters for forecasting model
485 |         :return: created Model object, it can be still in training state
486 |         """
487 |         if isinstance(query, Query):
488 |             database = query.database
489 |             query = query.sql
490 |         elif isinstance(query, pd.DataFrame):
491 |             raise NotImplementedError('Dataframe as input for training model is not supported yet')
492 | 
493 |         if database is not None:
494 |             database = Identifier(database)
495 | 
496 |         if predict is not None:
497 |             targets = [Identifier(predict)]
498 |         else:
499 |             targets = None
500 | 
501 |         ast_query = CreatePredictor(
502 |             name=Identifier(parts=[self.project.name, name]),
503 |             query_str=query,
504 |             integration_name=database,
505 |             targets=targets,
506 |         )
507 | 
508 |         if timeseries_options is not None:
509 |             # check ts options
510 |             allowed_keys = ['group', 'order', 'window', 'horizon']
511 |             for key in timeseries_options.keys():
512 |                 if key not in allowed_keys:
513 |                     raise AttributeError(f"Unexpected time series option: {key}")
514 | 
515 |             if 'group' in timeseries_options:
516 |                 group = timeseries_options['group']
517 |                 if not isinstance(group, list):
518 |                     group = [group]
519 |                 ast_query.group_by = [Identifier(i) for i in group]
520 |             if 'order' in timeseries_options:
521 |                 ast_query.order_by = [Identifier(timeseries_options['order'])]
522 |             if 'window' in timeseries_options:
523 |                 ast_query.window = timeseries_options['window']
524 |             if 'horizon' in timeseries_options:
525 |                 ast_query.horizon = timeseries_options['horizon']
526 | 
527 |         if options is None:
528 |             options = {}
529 |         # options and kwargs are the same
530 |         options.update(kwargs)
531 | 
532 |         if engine is not None:
533 |             if isinstance(engine, MLEngine):
534 |                 engine = engine.name
535 | 
536 |             options['engine'] = engine
537 |         ast_query.using = options
538 | 
539 |         sql = ast_query.to_string()
540 | 
541 |         if is_saving():
542 |             return Query(self, sql)
543 | 
544 |         df = self.project.api.sql_query(sql)
545 |         if len(df) > 0:
546 |             data = dict(df.iloc[0])
547 |             # to lowercase
548 |             data = {k.lower(): v for k,v in data.items()}
549 | 
550 |             return Model(self.project, data)
551 | 
552 |     def get(self, name: str, version: int = None) -> Union[Model, ModelVersion]:
553 |         """
554 |          Get model by name from project
555 | 
556 |          if version is passed it returns ModelVersion object with specific version
557 | 
558 |         :param name: name of the model
559 |         :param version: version of model, optional
560 |         :return: Model or ModelVersion object
561 |         """
562 |         if version is not None:
563 |             ret = self.list(with_versions=True, name=name, version=version)
564 |         else:
565 |             ret = self.list(name=name)
566 |         if len(ret) == 0:
567 |             raise AttributeError("Model doesn't exist")
568 |         elif len(ret) == 1:
569 |             return ret[0]
570 |         else:
571 |             raise RuntimeError('Several models with the same name/version')
572 | 
573 |     def drop(self, name: str):
574 |         """
575 |         Drop model from project with all versions
576 | 
577 |         >>> models.drop('rentals_model')
578 | 
579 |         :param name: name of the model
580 |         """
581 |         ast_query = DropPredictor(name=Identifier(parts=[self.project.name, name]))
582 |         sql = ast_query.to_string()
583 |         if is_saving():
584 |             return Query(self, sql)
585 | 
586 |         self.project.api.sql_query(sql)
587 | 
588 | 
589 |     def list(self, with_versions: bool = False,
590 |                     name: str = None,
591 |                     version: int = None) -> List[Union[Model, ModelVersion]]:
592 |         """
593 |         List models (or model versions) in project
594 | 
595 |         If with_versions = True
596 |           it shows all models with version (executes 'select * from models_versions')
597 | 
598 |           Otherwise it shows only models (executes 'select * from models')
599 | 
600 |         :param with_versions: show model versions
601 |         :param name: to show models or versions only with selected name, optional
602 |         :param version: to show model or versions only with selected version, optional
603 |         :return: list of Model or ModelVersion objects
604 |         """
605 | 
606 |         model_class = Model
607 | 
608 |         filters = {}
609 |         if name is not None:
610 |             filters['NAME'] = name
611 |         if version is not None:
612 |             filters['VERSION'] = version
613 | 
614 |         if with_versions:
615 |             model_class = ModelVersion
616 |         else:
617 |             filters['ACTIVE'] = '1'
618 | 
619 |         ast_query = Select(
620 |             targets=[Star()],
621 |             from_table=Identifier('models'),
622 |             where=dict_to_binary_op(filters)
623 |         )
624 |         df = self.project.query(ast_query.to_string()).fetch()
625 | 
626 |         # columns to lower case
627 |         cols_map = { i: i.lower() for i in df.columns }
628 |         df = df.rename(columns=cols_map)
629 | 
630 |         return [
631 |             model_class(self.project, item)
632 |             for item in df.to_dict('records')
633 |         ]


--------------------------------------------------------------------------------
/mindsdb_sdk/projects.py:
--------------------------------------------------------------------------------
  1 | from typing import  List
  2 | 
  3 | from mindsdb_sql_parser.ast.mindsdb import CreateDatabase, DropPredictor
  4 | from mindsdb_sql_parser.ast import DropDatabase
  5 | from mindsdb_sql_parser.ast import Identifier
  6 | 
  7 | from mindsdb_sdk.agents import Agents
  8 | from mindsdb_sdk.skills import Skills
  9 | from mindsdb_sdk.utils.objects_collection import CollectionBase
 10 | 
 11 | from .models import Models
 12 | from .query import Query
 13 | from .views import Views
 14 | from .jobs import Jobs
 15 | from .knowledge_bases import KnowledgeBases
 16 | 
 17 | 
 18 | class Project:
 19 |     """
 20 |     Allows to work with project: to manage models and views inside of it or call raw queries inside of project
 21 | 
 22 |     Server instance allows to manipulate project and databases (integration) on mindsdb server
 23 | 
 24 |     Attributes for accessing to different objects:
 25 |         - models, see :func:`~mindsdb_sdk.models.Models`
 26 |         - views, see :func:`~mindsdb_sdk.views.Views`
 27 |         - jobs, see :func:`~mindsdb_sdk.jobs.Jobs`
 28 | 
 29 |     It is possible to cal queries from project context:
 30 | 
 31 |     Making prediciton using sql:
 32 | 
 33 |     >>> query = project.query('select * from database.table join model1')
 34 |     >>> df = query.fetch()
 35 | 
 36 |     Making time series prediction:
 37 | 
 38 |     >>> df = project.query('''
 39 |     ...      SELECT m.saledate as date, m.ma as forecast
 40 |     ...     FROM mindsdb.house_sales_model as m
 41 |     ...     JOIN example_db.demo_data.house_sales as t
 42 |     ...     WHERE t.saledate > LATEST AND t.type = 'house'
 43 |     ...     AND t.bedrooms=2
 44 |     ...     LIMIT 4;
 45 |     ...    ''').fetch()
 46 | 
 47 |     """
 48 | 
 49 |     def __init__(self, server, api, name):
 50 |         self.name = name
 51 |         self.api = api
 52 |         self.server = server
 53 | 
 54 |         self.models = Models(self, api)
 55 | 
 56 |         # old api
 57 |         self.get_model = self.models.get
 58 |         self.list_models = self.models.list
 59 |         self.create_model = self.models.create
 60 |         self.drop_model = self.models.drop
 61 | 
 62 |         self.views = Views(self, api)
 63 | 
 64 |         # old api
 65 |         self.get_view = self.views.get
 66 |         self.list_views = self.views.list
 67 |         self.create_view = self.views.create
 68 |         self.drop_view = self.views.drop
 69 | 
 70 |         self.jobs = Jobs(self, api)
 71 | 
 72 |         # old api
 73 |         self.get_job = self.jobs.get
 74 |         self.list_jobs = self.jobs.list
 75 |         self.create_job = self.jobs.create
 76 |         self.drop_job = self.jobs.drop
 77 | 
 78 |         self.knowledge_bases = KnowledgeBases(self, api)
 79 | 
 80 |         self.skills = Skills(self, api)
 81 |         self.agents = Agents(self, api)
 82 | 
 83 |         self.minds = self.agents #alias
 84 | 
 85 |     def __repr__(self):
 86 |         return f'{self.__class__.__name__}({self.name})'
 87 | 
 88 |     def query(self, sql: str) -> Query:
 89 |         """
 90 |         Execute raw query inside of project
 91 | 
 92 |         :param sql: sql query
 93 |         :return: Query object
 94 |         """
 95 |         return Query(self.api, sql, database=self.name)
 96 | 
 97 |     def drop_model_version(self, name: str, version: int):
 98 |         """
 99 |         Drop version of the model
100 | 
101 |         :param name: name of the model
102 |         :param version: version to drop
103 |         """
104 |         ast_query = DropPredictor(Identifier(parts=[name, str(version)]))
105 | 
106 |         self.query(ast_query.to_string()).fetch()
107 | 
108 | 
109 | class Projects(CollectionBase):
110 |     """
111 |     Projects
112 |     ----------
113 | 
114 |     list of projects
115 | 
116 |     >>> projects.list()
117 | 
118 |     create
119 | 
120 |     >>> project = projects.create('proj')
121 | 
122 |     drop
123 | 
124 |     >>> projects.drop('proj')
125 | 
126 |     get existing
127 | 
128 |     >>> projects.get('proj')
129 | 
130 |     by attribute
131 |     >>> projects.proj
132 | 
133 |     """
134 | 
135 |     def __init__(self, server, api):
136 |         self.api = api
137 |         self.server = server
138 | 
139 |     def _list_projects(self):
140 |         data = self.api.sql_query("select NAME from information_schema.databases where TYPE='project'")
141 |         return list(data.NAME)
142 | 
143 |     def list(self) -> List[Project]:
144 |         """
145 |         Show list of project on server
146 | 
147 |         :return: list of Project objects
148 |         """
149 |         # select * from information_schema.databases where TYPE='project'
150 |         return [Project(self.server, self.api, name) for name in self._list_projects()]
151 | 
152 |     def get(self, name: str = 'mindsdb') -> Project:
153 |         """
154 |         Get Project by name
155 | 
156 |         :param name: name of project
157 |         :return: Project object
158 |         """
159 |         if name not in self._list_projects():
160 |             raise AttributeError("Project doesn't exist")
161 |         return Project(self.server, self.api, name)
162 | 
163 |     def create(self, name: str) -> Project:
164 |         """
165 |         Create new project and return it
166 | 
167 |         :param name: name of the project
168 |         :return: Project object
169 |         """
170 | 
171 |         ast_query = CreateDatabase(
172 |             name=Identifier(name),
173 |             engine='mindsdb',
174 |             parameters={}
175 |         )
176 | 
177 |         self.api.sql_query(ast_query.to_string())
178 |         return Project(self.server, self.api, name)
179 | 
180 |     def drop(self, name: str):
181 |         """
182 |         Drop project from server
183 | 
184 |         :param name: name of the project
185 |         """
186 |         ast_query = DropDatabase(name=Identifier(name))
187 |         self.api.sql_query(ast_query.to_string())
188 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/query.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | class Query:
 5 |     def __init__(self, api, sql, database=None):
 6 |         self.api = api
 7 | 
 8 |         self.sql = sql
 9 |         self.database = database
10 | 
11 |     def __repr__(self):
12 |         sql = self.sql.replace('\n', ' ')
13 |         if len(sql) > 40:
14 |             sql = sql[:37] + '...'
15 | 
16 |         return f'{self.__class__.__name__}({sql})'
17 | 
18 |     def fetch(self) -> pd.DataFrame:
19 |         """
20 |         Executes query in mindsdb server and returns result
21 |         :return: dataframe with result
22 |         """
23 |         return self.api.sql_query(self.sql, self.database)
24 | 
25 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/server.py:
--------------------------------------------------------------------------------
 1 | from .agents import Agents
 2 | from .databases import Databases
 3 | from .projects import Project, Projects
 4 | from .ml_engines import MLEngines
 5 | from .handlers import Handlers
 6 | from .skills import Skills
 7 | 
 8 | 
 9 | class Server(Project):
10 |     """
11 |     Server instance allows to manipulate project and databases (integration) on mindsdb server
12 | 
13 |     Attributes for accessing to different objects:
14 | 
15 |         - projects, see :func:`~mindsdb_sdk.projects.Projects`
16 |         - databases, see :func:`~mindsdb_sdk.databases.Databases`
17 |         - ml_engines, see :func:`~mindsdb_sdk.ml_engines.MLEngines`
18 |         - ml_handlers, see :func:`~mindsdb_sdk.handlers.MLHandlers`
19 |         - data_handlers, see :func:`~mindsdb_sdk.handlers.DataHandlers`
20 | 
21 |     Server is also root(mindsdb) project and has attributes of project:
22 |         - models, see :func:`~mindsdb_sdk.models.Models`
23 |         - views, see :func:`~mindsdb_sdk.views.Views`
24 |         - jobs, see :func:`~mindsdb_sdk.jobs.Jobs`
25 | 
26 |     """
27 | 
28 |     def __init__(self, api, skills: Skills = None, agents: Agents = None):
29 |         # server is also mindsdb project
30 |         self.databases = Databases(api)
31 |         self.ml_engines = MLEngines(api)
32 |         super().__init__(self, api, 'mindsdb')
33 | 
34 |         self.projects = Projects(self, api)
35 | 
36 |         # old api
37 |         self.get_project = self.projects.get
38 |         self.list_projects = self.projects.list
39 |         self.create_project = self.projects.create
40 |         self.drop_project = self.projects.drop
41 | 
42 |         # old api
43 |         self.get_database = self.databases.get
44 |         self.list_databases = self.databases.list
45 |         self.create_database = self.databases.create
46 |         self.drop_database = self.databases.drop
47 | 
48 |         self.ml_handlers = Handlers(self.api, 'ml')
49 |         self.data_handlers = Handlers(self.api, 'data')
50 | 
51 |     def status(self) -> dict:
52 |         """
53 |         Get server information. It could content version
54 |         Example of getting version for local:
55 | 
56 |         >>> print(server.status()['mindsdb_version'])
57 | 
58 |         :return: server status info
59 |         """
60 |         return self.api.status()
61 | 
62 |     def __repr__(self):
63 |         return f'{self.__class__.__name__}({self.api.url})'
64 | 
65 | 
66 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/skills.py:
--------------------------------------------------------------------------------
  1 | from typing import List
  2 | 
  3 | from mindsdb_sdk.utils.objects_collection import CollectionBase
  4 | 
  5 | 
  6 | class Skill():
  7 |     """Represents a MindsDB skill
  8 | 
  9 |     Working with skills:
 10 | 
 11 |     Get a skill by name:
 12 | 
 13 |     >>> skill = skills.get('my_skill')
 14 | 
 15 |     List all skills:
 16 | 
 17 |     >>> skills = skills.list()
 18 | 
 19 |     Create a new SQL skill:
 20 | 
 21 |     >>> text_to_sql_skill = skills.create('text_to_sql', 'sql', { 'tables': ['my_table'], 'database': 'my_database', 'description': 'my_description'})
 22 | 
 23 |     Update a skill:
 24 | 
 25 |     >>> skill.params = { 'tables': ['new_table'], 'database': 'new_database' }
 26 |     >>> updated_skill = skills.update('my_skill', skill)
 27 | 
 28 |     Delete a skill by name
 29 | 
 30 |     >>> skills.delete('my_skill')
 31 |     """
 32 |     def __init__(
 33 |             self,
 34 |             name: str,
 35 |             type: str,
 36 |             params: dict = None):
 37 |         self.name = name
 38 |         self.type = type
 39 |         self.params = params or {}
 40 | 
 41 |     def __eq__(self, other):
 42 |         if self.name != other.name:
 43 |             return False
 44 |         if self.type != other.type:
 45 |             return False
 46 |         return self.params == other.params
 47 | 
 48 |     def __repr__(self):
 49 |         return f'{self.__class__.__name__}(name: {self.name})'
 50 | 
 51 |     @classmethod
 52 |     def from_json(cls, json: dict):
 53 |         name = json['name']
 54 |         type = json['type']
 55 |         params = json['params']
 56 |         if json['type'] == 'sql':
 57 |             return SQLSkill(name, params['tables'], params['database'], params.get('description', ''))
 58 |         if json['type'] == 'retrieval':
 59 |             return RetrievalSkill(name, params['source'], params.get('description', ''))
 60 |         return Skill(name, type, params)
 61 | 
 62 | 
 63 | class SQLSkill(Skill):
 64 |     """Represents a MindsDB skill for agents to interact with MindsDB databases"""
 65 |     def __init__(self, name: str, tables: List[str], database: str, description: str):
 66 |         params = {
 67 |             'database': database,
 68 |             'tables': tables,
 69 |             'description': description
 70 |         }
 71 |         super().__init__(name, 'sql', params)
 72 | 
 73 | class RetrievalSkill(Skill):
 74 |     """Represents a MindsDB skill for agents to interact with MindsDB data sources"""
 75 |     def __init__(self, name: str, knowledge_base: str, description: str):
 76 |         params = {
 77 |             'source': knowledge_base,
 78 |             'description': description
 79 |         }
 80 |         super().__init__(name, 'retrieval', params)
 81 | 
 82 | 
 83 | class Skills(CollectionBase):
 84 |     """Collection for skills"""
 85 |     def __init__(self, project, api):
 86 |         self.api = api
 87 |         self.project = project
 88 | 
 89 |     def list(self) -> List[Skill]:
 90 |         """
 91 |         List available skills.
 92 | 
 93 |         :return: list of skills
 94 |         """
 95 |         data = self.api.skills(self.project.name)
 96 |         return [Skill.from_json(skill) for skill in data]
 97 | 
 98 |     def get(self, name: str) -> Skill:
 99 |         """
100 |         Gets a skill by name.
101 | 
102 |         :param name: name of the skill
103 | 
104 |         :return: skill with the given name
105 |         """
106 |         data = self.api.skill(self.project.name, name)
107 |         return Skill.from_json(data)
108 | 
109 |     def create(self, name: str, type: str, params: dict = None) -> Skill:
110 |         """
111 |         Create new skill and return it
112 | 
113 |         :param name: Name of the skill to be created
114 |         :param type: Type of the skill to be created
115 |         :param params: Parameters for the skill to be created
116 | 
117 |         :return: created skill object
118 |         """
119 |         _ = self.api.create_skill(self.project.name, name, type, params)
120 |         if type == 'sql':
121 |             return SQLSkill(name, params['tables'], params['database'], params['description'])
122 |         return Skill(name, type, params)
123 | 
124 |     def update(self, name: str, updated_skill: Skill) -> Skill:
125 |         """
126 |         Update a skill by name.
127 | 
128 |         param name: Name of the skill to be updated
129 |         :param updated_skill: Skill with updated fields
130 | 
131 |         :return: updated skillobject
132 |         """
133 |         data = self.api.update_skill(self.project.name, name, updated_skill.name, updated_skill.type, updated_skill.params)
134 |         return Skill.from_json(data)
135 | 
136 |     def drop(self, name: str):
137 |         """
138 |         Drop a skill by name.
139 | 
140 |         :param name: Name of the skill to be dropped
141 |         """
142 |         _ = self.api.delete_skill(self.project.name, name)
143 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/tables.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | from typing import Union
  3 | from typing import List
  4 | 
  5 | import pandas as pd
  6 | 
  7 | from mindsdb_sql_parser.ast import DropTables, CreateTable
  8 | from mindsdb_sql_parser.ast import Select, Star, Identifier, Constant, Delete, Insert, Update, Last, BinaryOperation
  9 | 
 10 | from mindsdb_sdk.utils.sql import dict_to_binary_op, add_condition, query_to_native_query
 11 | from mindsdb_sdk.utils.objects_collection import CollectionBase
 12 | from mindsdb_sdk.utils.context import is_saving
 13 | 
 14 | from .query import Query
 15 | 
 16 | 
 17 | class Table(Query):
 18 |     def __init__(self, db, name):
 19 |         # empty database
 20 |         super().__init__(db.api, '', None)
 21 |         self.name = name
 22 |         self.table_name = Identifier(parts=[db.name, name])
 23 |         self.db = db
 24 |         self._filters = {}
 25 |         self._limit = None
 26 |         self._track_column = None
 27 |         self._update_query()
 28 | 
 29 |     def _filters_repr(self):
 30 |         filters = ''
 31 |         if len(self._filters) > 0:
 32 |             filters = ', '.join(
 33 |                 f'{k}={v}'
 34 |                 for k, v in self._filters.items()
 35 |             )
 36 |             filters = ', ' + filters
 37 |         return filters
 38 | 
 39 |     def __repr__(self):
 40 |         limit_str = ''
 41 |         if self._limit is not None:
 42 |             limit_str = f'; limit={self._limit}'
 43 |         return f'{self.__class__.__name__}({self.table_name}{self._filters_repr()}{limit_str})'
 44 | 
 45 |     def filter(self, **kwargs):
 46 |         """
 47 |         Applies filters on table
 48 |         table.filter(a=1, b=2) adds where condition to table:
 49 |         'select * from table1 where a=1 and b=2'
 50 | 
 51 |         :param kwargs: filter
 52 |         :return: Table object
 53 |         """
 54 |         # creates new object
 55 |         query = copy.deepcopy(self)
 56 |         query._filters.update(kwargs)
 57 |         query._update_query()
 58 |         return query
 59 | 
 60 |     def limit(self, val: int):
 61 |         """
 62 |         Applies limit condition to table query
 63 | 
 64 |         :param val: limit size
 65 |         :return: Table object
 66 |         """
 67 |         query = copy.deepcopy(self)
 68 |         query._limit = val
 69 |         query._update_query()
 70 |         return query
 71 | 
 72 |     def track(self, column):
 73 |         """
 74 |         Apply tracking column to table. ('LAST' keyword in mindsdb)
 75 |         First call returns nothing
 76 |         The next calls return new records since previous call (where value of track_column is greater)
 77 | 
 78 |         Example:
 79 | 
 80 |         >>> query = con.databases.my_db.tables.sales.filter(type='house').track('created_at')
 81 |         >>> # first call returns no records
 82 |         >>> df = query.fetch()
 83 |         >>> # second call returns rows with created_at is greater since previous fetch
 84 |         >>> df = query.fetch()
 85 | 
 86 |         :param column: column to track new data from table.
 87 |         :return: Table object
 88 |         """
 89 |         query = copy.deepcopy(self)
 90 |         query._track_column = column
 91 | 
 92 |         query._update_query()
 93 |         return query
 94 | 
 95 |     def _update_query(self):
 96 |         where = dict_to_binary_op(self._filters)
 97 |         if self._track_column is not None:
 98 |             condition = BinaryOperation(op='>', args=[Identifier(self._track_column), Last()])
 99 |             where = add_condition(where, condition)
100 | 
101 |         ast_query = Select(
102 |             targets=[Star()],
103 |             from_table=self.table_name,
104 |             where=where
105 |         )
106 |         if self._limit is not None:
107 |             ast_query.limit = Constant(self._limit)
108 |         self.sql = ast_query.to_string()
109 | 
110 |     def insert(self, query: Union[pd.DataFrame, Query]):
111 |         """
112 |         Insert data from query of dataframe
113 |         :param query: dataframe of
114 |         :return:
115 |         """
116 | 
117 |         if isinstance(query, pd.DataFrame):
118 |             # insert data
119 |             data_split = query.to_dict('split')
120 | 
121 |             ast_query = Insert(
122 |                 table=self.table_name,
123 |                 columns=data_split['columns'],
124 |                 values=data_split['data']
125 |             )
126 | 
127 |             sql = ast_query.to_string()
128 | 
129 |         elif isinstance(query, Query):
130 |             # insert from select
131 | 
132 |             if query.database is not None:
133 |                 # use native query
134 |                 ast_query = Insert(
135 |                     table=self.table_name,
136 |                     from_select=query_to_native_query(query)
137 |                 )
138 |                 sql = ast_query.to_string()
139 |             else:
140 |                 sql = f'INSERT INTO {self.table_name.to_string()} ({query.sql})',
141 |         else:
142 |             raise ValueError(f'Invalid query type: {query}')
143 | 
144 |         if is_saving():
145 |             return Query(self, sql)
146 | 
147 |         self.api.sql_query(sql)
148 | 
149 |     def delete(self, **kwargs):
150 |         """
151 |         Deletes record from table using filters
152 | 
153 |         >>> table.delete(a=1, b=2)
154 | 
155 |         :param kwargs: filter
156 |         """
157 | 
158 |         ast_query = Delete(
159 |             table=self.table_name,
160 |             where=dict_to_binary_op(kwargs)
161 |         )
162 |         sql = ast_query.to_string()
163 | 
164 |         if is_saving():
165 |             return Query(self, sql)
166 | 
167 |         self.api.sql_query(sql)
168 | 
169 |     def update(self, values: Union[dict, Query], on: list = None, filters: dict = None):
170 |         '''
171 |         Update table by condition of from other table.
172 | 
173 |         If 'values' is a dict:
174 |           it will be an update by condition
175 |           'filters' is required
176 |           used command: update table set a=1 where x=1
177 | 
178 |         If 'values' is a Query:
179 |           it will be an update from select
180 |           'on' is required
181 |           used command: update table on a,b from (query)
182 | 
183 |         :param values: input for update, can be dict or query
184 |         :param on: list of column to map subselect to table ['a', 'b', ...]
185 |         :param filters: dict to filter updated rows, {'column': 'value', ...}
186 | 
187 |         '''
188 | 
189 |         if isinstance(values, Query):
190 |             # is update from select
191 |             if on is None:
192 |                 raise ValueError('"on" parameter is required for update from query')
193 | 
194 |             # insert from select
195 |             if values.database is not None:
196 |                 ast_query = Update(
197 |                     table=self.table_name,
198 |                     keys=[Identifier(col) for col in on],
199 |                     from_select=query_to_native_query(values)
200 |                 )
201 |                 sql = ast_query.to_string()
202 |             else:
203 |                 map_cols = ', '.join(on)
204 |                 sql = f'UPDATE {self.table_name.to_string()} ON {map_cols} FROM ({values.sql})'
205 | 
206 |         elif isinstance(values, dict):
207 |             # is regular update
208 |             if filters is None:
209 |                 raise ValueError('"filters" parameter is required for update')
210 | 
211 |             update_columns = {
212 |                 k: Constant(v)
213 |                 for k, v in values.items()
214 |             }
215 | 
216 |             ast_query = Update(
217 |                 table=self.table_name,
218 |                 update_columns=update_columns,
219 |                 where=dict_to_binary_op(filters)
220 |             )
221 | 
222 |             sql = ast_query.to_string()
223 |         else:
224 |             raise NotImplementedError
225 | 
226 |         if is_saving():
227 |             return Query(self, sql)
228 | 
229 |         self.api.sql_query(sql)
230 | 
231 | 
232 | class Tables(CollectionBase):
233 |     """
234 |     Wortking with tables:
235 |     Get table as Query object
236 | 
237 |     >>> table = tables.get('table1')
238 | 
239 |     Filter and limit
240 | 
241 |     >>> table = table.filter(a=1, b='2')
242 |     >>> table = table.limit(3)
243 | 
244 |     Get content of table as dataframe. At that moment query will be sent on server and executed
245 | 
246 |     >>> df = table.fetch()
247 | 
248 |     Creating table
249 | 
250 |     From query:
251 | 
252 |     >>> table = tables.create('table2', query)
253 | 
254 |     From other table
255 | 
256 |     >>> table2 = table.create('table2', table)
257 | 
258 |     Uploading file
259 | 
260 |     >>> db = con.databases.files
261 |     >>> db.tables.create('filename', dataframe)
262 | 
263 |   ` Droping table
264 | 
265 |     >>> db.tables.drop('table2')
266 |     """
267 | 
268 |     def __init__(self, database, api):
269 |         self.database = database
270 |         self.api = api
271 | 
272 |     def _list_tables(self):
273 |         df = self.database.query('show tables').fetch()
274 | 
275 |         # first column
276 |         return list(df[df.columns[0]])
277 | 
278 |     def list(self) -> List[Table]:
279 |         """
280 |         Show list of tables in integration
281 | 
282 |         :return: list of Table objects
283 |         """
284 |         return [Table(self.database, name) for name in self._list_tables()]
285 | 
286 |     def get(self, name: str) -> Table:
287 |         """
288 |         Get table by name
289 | 
290 |         :param name: name of table
291 |         :return: Table object
292 |         """
293 | 
294 |         return Table(self.database, name)
295 | 
296 |     def create(self, name: str, query: Union[pd.DataFrame, Query], replace: bool = False) -> Union[Table, Query]:
297 |         """
298 |         Create new table and return it.
299 | 
300 |         On mindsdb server it executes command:
301 |         `insert into a (select ...)`
302 | 
303 |         or if replace is True
304 |         `create table a (select ...)`
305 | 
306 |         'select ...' is extracted from input Query
307 | 
308 |         :param name: name of table
309 |         :param query: Query object
310 |         :param replace: if true,
311 |         :return: Table object
312 |         """
313 | 
314 |         if isinstance(query, pd.DataFrame) and self.database.name == 'files':
315 |             # now it is only possible for file uploading
316 |             self.api.upload_file(name, query)
317 | 
318 |             return Table(self.database, name)
319 | 
320 |         if not isinstance(query, Query):
321 |             raise NotImplementedError
322 | 
323 |         # # query can be in different database: wrap to NativeQuery
324 |         # ast_query = CreateTable(
325 |         #     name=Identifier(name),
326 |         #     is_replace=is_replace,
327 |         #     from_select=Select(
328 |         #         targets=[Star()],
329 |         #         from_table=NativeQuery(
330 |         #             integration=Identifier(data.database),
331 |         #             query=data.sql
332 |         #         )
333 |         #     )
334 |         # )
335 |         # self.query(ast_query.to_string()).fetch()
336 | 
337 |         # call in query database
338 |         table = Identifier(parts=[self.database.name, name])
339 | 
340 |         if query.database is not None:
341 |             # use native query
342 |             ast_query = CreateTable(
343 |                 name=table,
344 |                 is_replace=replace,
345 |                 from_select=query_to_native_query(query)
346 |             )
347 |             sql = ast_query.to_string()
348 |         else:
349 |             replace_str = ''
350 |             if replace:
351 |                 replace_str = ' or replace'
352 | 
353 |             sql = f'create{replace_str} table {table.to_string()} ({query.sql})'
354 | 
355 |         if is_saving():
356 |             return Query(self, sql)
357 | 
358 |         self.api.sql_query(sql)
359 | 
360 |         return Table(self.database, name)
361 | 
362 |     def drop(self, name: str):
363 |         """
364 |         Delete table
365 | 
366 |         :param name: name of table
367 |         """
368 |         table = Identifier(parts=[self.database.name, name])
369 | 
370 |         ast_query = DropTables(
371 |             tables=[table]
372 |         )
373 |         sql = ast_query.to_string()
374 | 
375 |         if is_saving():
376 |             return Query(self, sql)
377 |         self.api.sql_query(sql)
378 | 
379 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindsdb/mindsdb_python_sdk/c8c51f2597f04d30e6d16b798a304153e7bd9233/mindsdb_sdk/utils/__init__.py


--------------------------------------------------------------------------------
/mindsdb_sdk/utils/agents.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import json
  3 | import logging
  4 | from typing import Dict, Any, Generator, Optional, Tuple
  5 | 
  6 | 
  7 | class MindsDBSQLStreamParser:
  8 |     """
  9 |     A utility class for parsing SQL queries from MindsDB completion streams.
 10 | 
 11 |     This class provides methods to process completion streams, extract SQL queries,
 12 |     and accumulate full responses.
 13 | 
 14 |     Attributes:
 15 |         logger (logging.Logger): The logger instance for this class.
 16 |     """
 17 | 
 18 |     def __init__(self, log_level: int = logging.INFO):
 19 |         """
 20 |         Initialize the MindsDBSQLStreamParser.
 21 | 
 22 |         Args:
 23 |             log_level (int, optional): The logging level to use. Defaults to logging.INFO.
 24 |         """
 25 |         self.logger = logging.getLogger(__name__)
 26 |         self.logger.setLevel(log_level)
 27 | 
 28 |         # Create a console handler and set its level
 29 |         ch = logging.StreamHandler()
 30 |         ch.setLevel(log_level)
 31 | 
 32 |         # Create a formatter
 33 |         formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 34 | 
 35 |         # Add the formatter to the handler
 36 |         ch.setFormatter(formatter)
 37 | 
 38 |         # Add the handler to the logger
 39 |         self.logger.addHandler(ch)
 40 | 
 41 |     def stream_and_parse_sql_query(self, completion_stream: Generator[Dict[str, Any], None, None]) -> Generator[
 42 |         Dict[str, Optional[str]], None, None]:
 43 |         """
 44 |         Stream and parse the completion stream, yielding output and SQL queries.
 45 | 
 46 |         This generator function processes each chunk of the completion stream,
 47 |         extracts any output and SQL queries, and yields the results.
 48 | 
 49 |         Args:
 50 |             completion_stream (Generator[Dict[str, Any], None, None]): The input completion stream.
 51 | 
 52 |         Yields:
 53 |             Dict[str, Optional[str]]: A dictionary containing 'output' and 'sql_query' keys.
 54 |                 - 'output': The extracted output string from the chunk, if any.
 55 |                 - 'sql_query': The extracted SQL query string, if found in the chunk.
 56 | 
 57 |         Note:
 58 |             This function will only yield the first SQL query it finds in the stream.
 59 |         """
 60 |         sql_query_found = False
 61 | 
 62 |         for chunk in completion_stream:
 63 |             output = ""
 64 |             sql_query = None
 65 | 
 66 |             # Log full chunk at DEBUG level
 67 |             self.logger.debug(f"Processing chunk: {json.dumps(chunk, indent=2)}")
 68 | 
 69 |             # Log important info at INFO level
 70 |             if isinstance(chunk, dict):
 71 |                 if 'quick_response' in chunk:
 72 |                     self.logger.info(f"Quick response received: {json.dumps(chunk)}")
 73 | 
 74 |                 output = chunk.get('output', '')
 75 |                 if output:
 76 |                     self.logger.info(f"Chunk output: {output}")
 77 | 
 78 |                 if 'messages' in chunk:
 79 |                     for message in chunk['messages']:
 80 |                         if message.get('role') == 'assistant':
 81 |                             self.logger.info(f"Assistant message: {message.get('content', '')}")
 82 |                 if chunk.get('type') == 'sql':
 83 |                     sql_query = chunk['content']
 84 |                     self.logger.info(f"Generated SQL: {sql_query}")
 85 | 
 86 |             elif isinstance(chunk, str):
 87 |                 output = chunk
 88 |                 self.logger.info(f"String chunk received: {chunk}")
 89 | 
 90 |             yield {
 91 |                 'output':output,
 92 |                 'sql_query':sql_query
 93 |             }
 94 | 
 95 |     def process_stream(self, completion_stream: Generator[Dict[str, Any], None, None]) -> Tuple[str, Optional[str]]:
 96 |         """
 97 |         Process the completion stream and extract the SQL query.
 98 | 
 99 |         This method iterates through the stream, accumulates the full response,
100 |         logs outputs, and extracts the SQL query when found.
101 | 
102 |         Args:
103 |             completion_stream (Generator[Dict[str, Any], None, None]): The input completion stream.
104 | 
105 |         Returns:
106 |             Tuple[str, Optional[str]]: A tuple containing:
107 |                 - The full accumulated response as a string.
108 |                 - The extracted SQL query as a string, or None if no query was found.
109 |         """
110 |         full_response = ""
111 |         sql_query = None
112 | 
113 |         self.logger.info("Starting to process completion stream...")
114 | 
115 |         for result in self.stream_and_parse_sql_query(completion_stream):
116 |             if result['output']:
117 |                 self.logger.info(f"Output: {result['output']}")
118 |                 full_response += result['output']
119 | 
120 |             if result['sql_query'] and sql_query is None:
121 |                 sql_query = result['sql_query']
122 |                 self.logger.info(f"Extracted SQL Query: {sql_query}")
123 | 
124 |         self.logger.info(f"Full Response: {full_response}")
125 |         self.logger.info(f"Final SQL Query: {sql_query}")
126 | 
127 |         return full_response, sql_query
128 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/utils/context.py:
--------------------------------------------------------------------------------
 1 | from contextvars import ContextVar
 2 | 
 3 | context_storage = ContextVar('create_context')
 4 | 
 5 | 
 6 | def set_context(name: str, value: str):
 7 |     """
 8 |     Set context value to variable
 9 | 
10 |     :param name: variable name
11 |     :param value: variable value
12 |     """
13 |     data = context_storage.get({})
14 |     data[name] = value
15 | 
16 |     context_storage.set(data)
17 | 
18 | 
19 | def get_context(name: str) -> str:
20 |     """
21 |     Get context value fom variable
22 | 
23 |     :param name: variable name
24 |     :return: variable value
25 |     """
26 | 
27 |     data = context_storage.get({})
28 |     return data.get(name)
29 | 
30 | 
31 | def set_saving(name: str):
32 |     """
33 |     Set name of saving object to context
34 | 
35 |     :param name: namve of the object
36 |     """
37 |     set_context('saving', name)
38 | 
39 | 
40 | def is_saving() -> bool:
41 |     """
42 |     Returns true if object is saved at the moment
43 |     """
44 | 
45 |     return get_context('saving') is not None
46 | 
47 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/utils/mind.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | from pydantic import BaseModel, Field
  3 | from typing import List, Optional
  4 | from uuid import uuid4
  5 | 
  6 | import requests
  7 | from logging import getLogger
  8 | 
  9 | logger = getLogger(__name__)
 10 | 
 11 | 
 12 | # Define the Mind entity
 13 | class Mind:
 14 |     """
 15 |     Mind entity
 16 |     """
 17 | 
 18 |     def __init__(self, name):
 19 |         self.name = name
 20 | 
 21 | 
 22 | class DataSourceConfig(BaseModel):
 23 |     """
 24 |     Represents a data source that can be made available to a Mind.
 25 |     """
 26 |     id: str = Field(default_factory=lambda: uuid4().hex)
 27 | 
 28 |     # Description for underlying agent to know, based on context, whether to access this data source.
 29 |     description: str
 30 | 
 31 | 
 32 | class DatabaseConfig(DataSourceConfig):
 33 |     """
 34 |     Represents a database that can be made available to a Mind.
 35 |     """
 36 | 
 37 |     # Integration name (e.g. postgres)
 38 |     type: str
 39 | 
 40 |     # Args for connecting to database.
 41 |     connection_args: dict
 42 | 
 43 |     # Tables to make available to the Mind (defaults to ALL).
 44 |     tables: List[str] = []
 45 | 
 46 | 
 47 | class FileConfig(DataSourceConfig):
 48 |     """
 49 |     Represents a collection of files that can be made available to a Mind.
 50 |     """
 51 | 
 52 |     # Local file paths and/or URLs.
 53 |     paths: List[str]
 54 | 
 55 |     # TODO: Configure Vector storage. Use defaults for now.
 56 | 
 57 | 
 58 | class WebConfig(DataSourceConfig):
 59 |     """
 60 |     Represents a collection of URLs that can be crawled and made available to a Mind.
 61 |     """
 62 | 
 63 |     # Base URLs to crawl from.
 64 |     urls: List[str]
 65 | 
 66 |     # Scrapes all URLs found in the starting page (default).
 67 |     # 0 = scrape provided URLs only
 68 |     # -1 = no limit (we should set our own sensible limit)
 69 |     crawl_depth: int = 1
 70 | 
 71 |     # Include only URLs that match regex patterns.
 72 |     filters: List[str] = [ ]
 73 | 
 74 | 
 75 | # Create mind entity util function
 76 | def create_mind(
 77 |         base_url: str,
 78 |         api_key: str,
 79 |         name: str,
 80 |         data_source_configs: List[DataSourceConfig] = None,
 81 |         model: Optional[str] = None,
 82 | ) -> Mind:
 83 |     """
 84 |     Create a mind entity in LiteLLM proxy.
 85 | 
 86 |     Args:
 87 |         base_url (str): MindsDB base URL
 88 |         api_key (str): MindsDB API key
 89 |         name (str): Mind name
 90 |         data_source_configs (List[DataSourceConfig]): Data sources to make available to the mind
 91 |         model: Model orchestrating the AI reasoning loop
 92 | 
 93 |     Returns:
 94 |         Mind: Mind entity
 95 |     """
 96 |     warnings.simplefilter('always', DeprecationWarning)  # turn off filter
 97 |     warnings.warn(
 98 |         'Minds in python SDK are deprecated. Use minds SDK instead (`pip install minds-sdk`)',
 99 |         category=DeprecationWarning
100 |     )
101 |     warnings.simplefilter('default', DeprecationWarning)
102 | 
103 |     url = f"{base_url.rstrip('/')}/minds"
104 |     headers = {"Authorization": f"Bearer {api_key}"}
105 |     if data_source_configs is None:
106 |         data_source_configs = []
107 |     payload = {
108 |         "name": name,
109 |         "data_source_configs": [d.model_dump() for d in data_source_configs],
110 |         "model": model
111 |     }
112 |     try:
113 |         response = requests.post(url, json=payload, headers=headers)
114 |         response.raise_for_status()
115 |     except requests.exceptions.HTTPError as e:
116 |         try:
117 |             error_message = e.response.json()
118 |         except Exception:
119 |             error_message = str(e)
120 |         logger.error(f"Failed to create mind: {error_message}")
121 |         raise e
122 |     except Exception as e:
123 |         logger.error(f"Failed to create mind: {e}")
124 |         raise e
125 | 
126 |     name = response.json()['name']
127 | 
128 |     return Mind(name=name)
129 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/utils/objects_collection.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from typing import Iterable
 3 | 
 4 | 
 5 | class CollectionBase:
 6 | 
 7 |     def __dir__(self) -> Iterable[str]:
 8 |         internal_methods = ['create', 'drop', 'get', 'list']
 9 | 
10 |         items = [item.name for item in self.list()]
11 | 
12 |         items = [i for i in items if re.match('^(?![0-9])\w+$', i)]
13 |         return internal_methods + items
14 | 
15 |     def __getattr__(self, name):
16 |         if name.startswith('__'):
17 |             raise AttributeError(name)
18 | 
19 |         return self.get(name)
20 | 
21 | 
22 | # class MethodCollection(CollectionBase):
23 | #
24 | #     def __init__(self, name, methods):
25 | #         self.name = name
26 | #         self.methods = methods
27 | #
28 | #     def __repr__(self):
29 | #         return f'{self.__class__.__name__}({self.name})'
30 | #
31 | #     def get(self, *args, **kwargs):
32 | #         method = self.methods.get('get')
33 | #         if method is None:
34 | #             raise NotImplementedError()
35 | #
36 | #         return method(*args, **kwargs)
37 | #
38 | #     def list(self, *args, **kwargs):
39 | #         method = self.methods.get('list')
40 | #         if method is None:
41 | #             raise NotImplementedError()
42 | #
43 | #         return method(*args, **kwargs)
44 | #
45 | #     def create(self, *args, **kwargs):
46 | #         method = self.methods.get('create')
47 | #         if method is None:
48 | #             raise NotImplementedError()
49 | #
50 | #         return method(*args, **kwargs)
51 | #
52 | #     def drop(self, name):
53 | #         method = self.methods.get('drop')
54 | #         if method is None:
55 | #             raise NotImplementedError()
56 | #
57 | #         return method(name)
58 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/utils/openai.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from logging import getLogger
  3 | from typing import List
  4 | 
  5 | import openai
  6 | from openai.types.chat import ChatCompletionToolChoiceOptionParam
  7 | 
  8 | from mindsdb_sdk.databases import Database
  9 | from tenacity import retry, wait_random_exponential, stop_after_attempt
 10 | 
 11 | 
 12 | DEFAULT_RETRY_MULTIPLIER = 1
 13 | DEFAULT_MAX_WAIT = 40
 14 | DEFAULT_STOP_AFTER_ATTEMPT = 3
 15 | 
 16 | logger = getLogger(__name__)
 17 | 
 18 | 
 19 | @retry(wait=wait_random_exponential(multiplier=DEFAULT_RETRY_MULTIPLIER, max=DEFAULT_MAX_WAIT), stop=stop_after_attempt(
 20 |     DEFAULT_RETRY_MULTIPLIER
 21 | ))
 22 | def chat_completion_request(
 23 |         client: openai.OpenAI,
 24 |         model: str,
 25 |         messages: List[dict],
 26 |         tools: List = None,
 27 |         tool_choice: ChatCompletionToolChoiceOptionParam = None
 28 | ):
 29 |     try:
 30 |         response = client.chat.completions.create(
 31 |             model=model,
 32 |             messages=messages,
 33 |             tools=tools,
 34 |             tool_choice=tool_choice,
 35 |         )
 36 |         return response
 37 |     except Exception as e:
 38 |         logger.warning("Unable to generate ChatCompletion response")
 39 |         logger.warning(f"Exception: {e}")
 40 |         return e
 41 | 
 42 | 
 43 | def make_openai_tool(function: callable, description: str = None) -> dict:
 44 |     """
 45 |     Make a generic OpenAI tool for a function
 46 | 
 47 |     :param function: function to generate metadata for
 48 |     :param description: description of the function
 49 | 
 50 |     :return: dictionary containing function metadata
 51 |     """
 52 | 
 53 |     import inspect
 54 |     import docstring_parser
 55 | 
 56 |     params = inspect.signature(function).parameters
 57 |     docstring = docstring_parser.parse(function.__doc__)
 58 | 
 59 |     # Get the first line of the docstring as the function description or use the user-provided description
 60 |     function_description = description or docstring.short_description
 61 | 
 62 |     function_dict = {
 63 |         "type":"function",
 64 |         "function":{
 65 |             "name":function.__name__,
 66 |             "description":function_description,
 67 |             "parameters":{
 68 |                 "type":"object",
 69 |                 "properties":{},
 70 |                 "required":[]
 71 |             }
 72 |         }
 73 |     }
 74 | 
 75 |     for name, param in params.items():
 76 |         param_description = next((p.description for p in docstring.params if p.arg_name == name), '')
 77 | 
 78 |         # convert annotation type to string
 79 |         if param.annotation is not inspect.Parameter.empty:
 80 |             if inspect.isclass(param.annotation):
 81 |                 param_type = param.annotation.__name__
 82 |             else:
 83 |                 param_type = str(param.annotation)
 84 |         else:
 85 |             param_type = None
 86 | 
 87 |         function_dict["function"]["parameters"]["properties"][name] = {
 88 |             "type":param_type,
 89 |             "description":param_description
 90 |         }
 91 | 
 92 |         # Check if parameter is required
 93 |         if param.default == inspect.Parameter.empty:
 94 |             function_dict["function"]["parameters"]["required"].append(name)
 95 | 
 96 |     return function_dict
 97 | 
 98 | 
 99 | def make_query_tool(schema: dict) -> dict:
100 |     """
101 |     Make an OpenAI tool for querying a database connection in MindsDB
102 | 
103 |     :param schema: database schema
104 | 
105 |     :return: dictionary containing function metadata for openai tools
106 |     """
107 |     return {
108 |         "type":"function",
109 |         "function":{
110 |             "name":"query_database",
111 |             "description":"Use this function to answer user questions. Input should be a fully formed SQL query.",
112 |             "parameters":{
113 |                 "type":"object",
114 |                 "properties":{
115 |                     "query":{
116 |                         "type":"string",
117 |                         "description":f"""
118 |                                     SQL query extracting info to answer the user's question.
119 |                                     SQL should be written using this database schema:
120 |                                     {schema}
121 |                                     The query should be returned in plain text, not in JSON.
122 |                                     """,
123 |                     }
124 |                 },
125 |                 "required":["query"],
126 |             },
127 |         }
128 |     }
129 | 
130 | 
131 | def make_data_tool(
132 |     model: str,
133 |     data_source: str,
134 |     description: str,
135 |     connection_args: dict
136 | ):
137 |     """
138 |     tool passing mindsdb database connection details for datasource to litellm callback
139 | 
140 |     :param model: model name for text to sql completion
141 |     :param data_source: data source name
142 |     :param description: description of the data source
143 |     :param connection_args: connection arguments for the data source
144 | 
145 |     :return: dictionary containing function metadata for openai tools
146 |     """
147 |     # Convert the connection_args dictionary to a JSON object
148 |     connection_args_json = json.dumps(connection_args)
149 | 
150 |     tool_description = f"""
151 | Queries the provided data source about user data. When calling this function, ALWAYS use the following arguments:
152 | - model: {model}
153 | - connection_args: {connection_args_json}
154 | - data_source: {data_source}
155 | - description: {description}
156 | """
157 | 
158 |     return {
159 |         "type":"function",
160 |         "function":{
161 |             "name":"get_mindsdb_text_to_sql_completion",
162 |             "description":tool_description,
163 |             "parameters":{
164 |                 "type":"object",
165 |                 "properties":{
166 |                     "model":{
167 |                         "type":"string",
168 |                         "description":"llm model name to use for text to sql completion",
169 |                     },
170 |                     "data_source":{
171 |                         "type":"string",
172 |                         "description":"Data source name",
173 |                     },
174 |                     "connection_args":{
175 |                         "type":"string",
176 |                         "description":"Connection arguments for the data source",
177 |                     },
178 |                     "description":{
179 |                         "type":"string",
180 |                         "description":"Description of the data source",
181 |                     }
182 |                 },
183 |                 "required": ['data_source', 'connection_args', 'model', 'description']
184 |             }
185 |         }
186 |     }
187 | 
188 | 
189 | def extract_sql_query(result: str) -> str:
190 |     """
191 |     Extract the SQL query from an openai result string
192 | 
193 |     :param result: OpenAI result string
194 |     :return: SQL query string
195 |     """
196 |     # Split the result into lines
197 |     lines = result.split('\n')
198 | 
199 |     # Initialize an empty string to hold the query
200 |     query = ""
201 | 
202 |     # Initialize a flag to indicate whether we're currently reading the query
203 |     reading_query = False
204 | 
205 |     # Iterate over the lines
206 |     for line in lines:
207 |         # If the line starts with "SQLQuery:", start reading the query
208 |         if line.startswith("SQLQuery:"):
209 |             query = line[len("SQLQuery:"):].strip()
210 |             reading_query = True
211 |         # If the line starts with "SQLResult:", stop reading the query
212 |         elif line.startswith("SQLResult:"):
213 |             break
214 |         # If we're currently reading the query, append the line to the query
215 |         elif reading_query:
216 |             query += " " + line.strip()
217 | 
218 |     # If no line starts with "SQLQuery:", return None
219 |     if query == "":
220 |         return None
221 | 
222 |     return query
223 | 
224 | 
225 | def query_database(database: Database, query: str) -> str:
226 |     """
227 |     Execute a query on a database connection
228 | 
229 |     :param database: mindsdb Database object
230 |     :param query: SQL query string
231 | 
232 |     :return: query results as a string
233 |     """
234 |     try:
235 |         results = str(
236 |             database.query(query).fetch()
237 |         )
238 |     except Exception as e:
239 |         results = f"query failed with error: {e}"
240 |     return results
241 | 
242 | 
243 | def execute_function_call(message, database: Database = None) -> str:
244 |     """
245 |     Execute a function call in a message
246 | 
247 |     """
248 |     if message.tool_calls[0].function.name == "query_database":
249 |         query = json.loads(message.tool_calls[0].function.arguments)["query"]
250 |         results = query_database(database, query)
251 |     else:
252 |         results = f"Error: function {message.tool_calls[0].function.name} does not exist"
253 |     return results
254 | 
255 | 
256 | def pretty_print_conversation(messages):
257 |     # you will need to pip install termcolor
258 |     from termcolor import colored
259 |     role_to_color = {
260 |         "system":"red",
261 |         "user":"green",
262 |         "assistant":"blue",
263 |         "function":"magenta",
264 |     }
265 | 
266 |     for message in messages:
267 |         if message["role"] == "system":
268 |             logger.info(colored(f"system: {message['content']}\n", role_to_color[message["role"]]))
269 |         elif message["role"] == "user":
270 |             logger.info(colored(f"user: {message['content']}\n", role_to_color[message["role"]]))
271 |         elif message["role"] == "assistant" and message.get("function_call"):
272 |             logger.info(colored(f"assistant: {message['function_call']}\n", role_to_color[message["role"]]))
273 |         elif message["role"] == "assistant" and not message.get("function_call"):
274 |             logger.info(colored(f"assistant: {message['content']}\n", role_to_color[message["role"]]))
275 |         elif message["role"] == "function":
276 |             logger.info(colored(f"function ({message['name']}): {message['content']}\n", role_to_color[message["role"]]))


--------------------------------------------------------------------------------
/mindsdb_sdk/utils/sql.py:
--------------------------------------------------------------------------------
 1 | from mindsdb_sql_parser.ast import BinaryOperation, Identifier, Constant, Select, Star, NativeQuery
 2 | from mindsdb_sdk.query import Query
 3 | 
 4 | 
 5 | def dict_to_binary_op(filters):
 6 |     where = None
 7 |     for name, value in filters.items():
 8 |         condition = BinaryOperation('=', args=[Identifier(name), Constant(value)])
 9 | 
10 |         where = add_condition(where, condition)
11 | 
12 |     return where
13 | 
14 | 
15 | def add_condition(where, condition):
16 |     if where is None:
17 |         return condition
18 |     else:
19 |         return BinaryOperation(
20 |             'and',
21 |             args=[where, condition]
22 |         )
23 | 
24 | 
25 | def query_to_native_query(query: Query):
26 |     return Select(
27 |         targets=[Star()],
28 |         from_table= NativeQuery(
29 |             integration=Identifier(query.database),
30 |             query=query.sql
31 |         )
32 |     )


--------------------------------------------------------------------------------
/mindsdb_sdk/utils/table_schema.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | import pandas as pd
 4 | 
 5 | from mindsdb_sdk.databases import Databases
 6 | 
 7 | 
 8 | N_ROWS = 10
 9 | 
10 | 
11 | def get_dataframe_schema(df: pd.DataFrame):
12 |     """
13 |     Get the schema of a DataFrame
14 | 
15 |     :param df: DataFrame
16 | 
17 |     :return: list of dictionaries containing column names and types
18 |     """
19 |     # Get the dtypes Series
20 |     try:
21 |         df = df.convert_dtypes()
22 |     except Exception as e:
23 |         raise f"Error converting dtypes: {e}"
24 | 
25 |     dtypes = df.dtypes
26 | 
27 |     # Convert the dtypes Series into a list of dictionaries
28 |     schema = [{"name": column, "type": dtype.name} for column, dtype in dtypes.items()]
29 | 
30 |     return schema
31 | 
32 | 
33 | def get_table_schemas(database: Databases, included_tables: List[str] = None, n_rows: int = N_ROWS) -> dict:
34 |     """
35 |     Get table schemas from a database
36 | 
37 |     :param database: database object
38 |     :param included_tables: list of table names to get schemas for
39 |     :param n_rows: number of rows to fetch from each table
40 | 
41 |     :return: dictionary containing table schemas
42 |     """
43 | 
44 |     tables = [table.name for table in database.tables.list()]
45 | 
46 |     if included_tables:
47 |         tables = [table for table in tables if table in included_tables]
48 | 
49 |     table_schemas = {}
50 |     for table in tables:
51 |         table_df = database.get_table(table).limit(n_rows).fetch()
52 |         table_schemas[table] = get_dataframe_schema(table_df)
53 | 
54 |     return table_schemas
55 | 


--------------------------------------------------------------------------------
/mindsdb_sdk/views.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Union
  2 | 
  3 | from mindsdb_sql_parser.ast.mindsdb import CreateView
  4 | from mindsdb_sql_parser.ast import DropView
  5 | from mindsdb_sql_parser.ast import Identifier
  6 | 
  7 | from mindsdb_sdk.utils.objects_collection import CollectionBase
  8 | 
  9 | from .query import Query
 10 | from .tables import Table
 11 | 
 12 | 
 13 | class View(Table):
 14 |     # The same as table
 15 |     pass
 16 | 
 17 | 
 18 | # TODO getting view sql from api not implemented yet
 19 | # class View(Table):
 20 | #     def __init__(self, api, data, project):
 21 | #         super().__init__(api, data['name'], project)
 22 | #         self.view_sql = data['sql']
 23 | #
 24 | #     def __repr__(self):
 25 | #         #
 26 | #         sql = self.view_sql.replace('\n', ' ')
 27 | #         if len(sql) > 40:
 28 | #             sql = sql[:37] + '...'
 29 | #
 30 | #         return f'{self.__class__.__name__}({self.name}{self._filters_repr()}, sql={sql})'
 31 | 
 32 | class Views(CollectionBase):
 33 |     """
 34 |      **Views**
 35 | 
 36 |     Get:
 37 | 
 38 |     >>> views = views.list()
 39 |     >>> view = views[0]
 40 | 
 41 |     By name:
 42 | 
 43 |     >>> view = views.get('view1')
 44 | 
 45 |     Create:
 46 | 
 47 |     >>> view = views.create(
 48 |     ...   'view1',
 49 |     ...   database='example_db',  # optional, can also be database object
 50 |     ...   query='select * from table1'
 51 |     ...)
 52 | 
 53 |     Create using query object:
 54 | 
 55 |     >>> view = views.create(
 56 |     ...   'view1',
 57 |     ...   query=database.query('select * from table1')
 58 |     ...)
 59 | 
 60 |     Getting data:
 61 | 
 62 |     >>> view = view.filter(a=1, b=2)
 63 |     >>> view = view.limit(100)
 64 |     >>> df = view.fetch()
 65 | 
 66 |     Drop view:
 67 | 
 68 |     >>> views.drop('view1')
 69 | 
 70 |     """
 71 | 
 72 |     def __init__(self, project, api):
 73 |         self.project = project
 74 |         self.api = api
 75 | 
 76 | 
 77 |     # The same as table
 78 |     def _list_views(self):
 79 |         df = self.api.objects_tree(self.project.name)
 80 |         df = df[df.type == 'view']
 81 | 
 82 |         return list(df['name'])
 83 | 
 84 |     def list(self) -> List[View]:
 85 |         """
 86 |         Show list of views in project
 87 | 
 88 |         :return: list of View objects
 89 |         """
 90 |         return [View(self.project, name) for name in self._list_views()]
 91 | 
 92 |     def create(self, name: str, sql: Union[str, Query], database: str = None) -> View:
 93 |         """
 94 |         Create new view in project and return it
 95 | 
 96 |         :param name: name of the view
 97 |         :param sql: sql query as string or query object
 98 |         :param database: datasource of the view (where input sql will be executed)
 99 |         :return: View object
100 |         """
101 |         if isinstance(sql, Query):
102 |             database = sql.database
103 |             sql = sql.sql
104 |         elif not isinstance(sql, str):
105 |             raise ValueError(sql)
106 | 
107 |         if database is not None:
108 |             database = Identifier(database)
109 |         ast_query = CreateView(
110 |             name=Identifier(name),
111 |             query_str=sql,
112 |             from_table=database
113 |         )
114 | 
115 |         self.project.query(ast_query.to_string()).fetch()
116 |         return View(self.project, name)
117 | 
118 |     def drop(self, name: str):
119 |         """
120 |         Drop view from project
121 | 
122 |         :param name: name of the view
123 |         """
124 | 
125 |         ast_query = DropView(names=[Identifier(name)])
126 | 
127 |         self.project.query(ast_query.to_string()).fetch()
128 | 
129 |     def get(self, name: str) -> View:
130 |         """
131 |         Get view by name from project
132 | 
133 |         :param name: name of the view
134 |         :return: View object
135 |         """
136 | 
137 |         if name not in self._list_views():
138 |             raise AttributeError("View doesn't exist")
139 |         return View(self.project, name)
140 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests
2 | pandas >= 1.3.5
3 | mindsdb-sql-parser ~= 0.0.0
4 | docstring-parser >= 0.7.3
5 | tenacity >= 8.0.1
6 | openai >= 1.74.1
7 | sseclient-py >= 1.8.0
8 | validators == 0.20.0
9 | 


--------------------------------------------------------------------------------
/requirements_test.txt:
--------------------------------------------------------------------------------
1 | pytest
2 | pytest-cov
3 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | about = {}
 4 | with open("mindsdb_sdk/__about__.py") as fp:
 5 |     exec(fp.read(), about)
 6 | 
 7 | with open("README.md", "r") as fh:
 8 |     long_description = fh.read()
 9 | 
10 | with open('requirements.txt') as req_file:
11 |     requirements = req_file.read().splitlines()
12 | 
13 | setup(
14 |     name=about['__title__'],
15 |     version=about['__version__'],
16 |     url=about['__github__'],
17 |     download_url=about['__pypi__'],
18 |     license=about['__license__'],
19 |     author=about['__author__'],
20 |     author_email=about['__email__'],
21 |     description=about['__description__'],
22 |     long_description=long_description,
23 |     long_description_content_type="text/markdown",
24 |     packages=find_packages(exclude=('tests*',)),
25 |     install_requires=requirements,
26 |     extras_require={
27 |         'dev': [
28 |             'pytest',
29 |         ]
30 |     },
31 |     classifiers=[
32 |         "Programming Language :: Python :: 3",
33 |         "License :: OSI Approved :: MIT License",
34 |         "Operating System :: OS Independent",
35 |     ],
36 |     python_requires=">=3.6",
37 | )
38 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mindsdb/mindsdb_python_sdk/c8c51f2597f04d30e6d16b798a304153e7bd9233/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_agent_stream_process.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import logging
 3 | 
 4 | from mindsdb_sdk.utils.agents import MindsDBSQLStreamParser
 5 | 
 6 | @pytest.fixture
 7 | def parser():
 8 |     return MindsDBSQLStreamParser(log_level=logging.INFO)
 9 | 
10 | def test_initialization(parser):
11 |     assert isinstance(parser, MindsDBSQLStreamParser)
12 |     assert parser.logger.level == logging.INFO
13 | 
14 | def test_stream_and_parse_sql_query_with_dict(parser):
15 |     mock_stream = [
16 |         {'output': 'Test output', 'type': 'text'},
17 |         {'type': 'sql', 'content': 'SELECT * FROM table'},
18 |         {'output': 'More output'}
19 |     ]
20 | 
21 |     generator = parser.stream_and_parse_sql_query(iter(mock_stream))
22 |     results = list(generator)
23 | 
24 |     assert len(results) == 3
25 |     assert results[0] == {'output': 'Test output', 'sql_query': None}
26 |     assert results[1] == {'output': '', 'sql_query': 'SELECT * FROM table'}
27 |     assert results[2] == {'output': 'More output', 'sql_query': None}
28 | 
29 | def test_stream_and_parse_sql_query_with_string(parser):
30 |     mock_stream = ['String chunk 1', 'String chunk 2']
31 | 
32 |     generator = parser.stream_and_parse_sql_query(iter(mock_stream))
33 |     results = list(generator)
34 | 
35 |     assert len(results) == 2
36 |     assert results[0] == {'output': 'String chunk 1', 'sql_query': None}
37 |     assert results[1] == {'output': 'String chunk 2', 'sql_query': None}
38 | 
39 | 
40 | def test_process_stream(parser, caplog):
41 |     mock_stream = [
42 |         {'output':'First output'},
43 |         {'type':'sql', 'content':'SELECT * FROM users'},
44 |         {'output':'Second output'}
45 |     ]
46 | 
47 |     with caplog.at_level(logging.INFO):
48 |         full_response, sql_query = parser.process_stream(iter(mock_stream))
49 | 
50 |     assert full_response == 'First outputSecond output'
51 |     assert sql_query == 'SELECT * FROM users'
52 | 
53 |     # Check for specific log messages
54 |     assert 'Starting to process completion stream...' in caplog.text
55 |     assert 'Output: First output' in caplog.text
56 |     assert 'Extracted SQL Query: SELECT * FROM users' in caplog.text
57 |     assert 'Output: Second output' in caplog.text
58 |     assert f'Full Response: {full_response}' in caplog.text
59 |     assert f'Final SQL Query: {sql_query}' in caplog.text
60 | 
61 | def test_process_stream_no_sql(parser):
62 |     mock_stream = [
63 |         {'output': 'First output'},
64 |         {'output': 'Second output'}
65 |     ]
66 | 
67 |     full_response, sql_query = parser.process_stream(iter(mock_stream))
68 | 
69 |     assert full_response == 'First outputSecond output'
70 |     assert sql_query is None
71 | 


--------------------------------------------------------------------------------
/tests/test_openai.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from unittest.mock import patch, MagicMock
 3 | from mindsdb_sdk.utils import openai
 4 | 
 5 | 
 6 | def test_chat_completion_request_success():
 7 |     mock_client = MagicMock()
 8 |     mock_client.chat.completions.create.return_value = "Test Response"
 9 |     response = openai.chat_completion_request(mock_client, "text-davinci-002", [{"role": "system", "content": "You are a helpful assistant."}])
10 |     assert response == "Test Response"
11 | 
12 | 
13 | def test_make_openai_tool():
14 |     def test_func(a: int, b: str) -> str:
15 |         """This is a test function"""
16 |         return b * a
17 |     tool = openai.make_openai_tool(test_func)
18 |     assert tool["function"]["name"] == "test_func"
19 |     assert tool["function"]["description"] == "This is a test function"
20 |     assert tool["function"]["parameters"]["properties"]["a"]["type"] == "int"
21 |     assert tool["function"]["parameters"]["properties"]["b"]["type"] == "str"
22 | 
23 | 
24 | def test_extract_sql_query():
25 |     result = "SQLQuery: SELECT * FROM test_table\nSQLResult: [{'column1': 'value1', 'column2': 'value2'}]"
26 |     query = openai.extract_sql_query(result)
27 |     assert query == "SELECT * FROM test_table"
28 | 
29 | 
30 | def test_extract_sql_query_no_query():
31 |     result = "SQLResult: [{'column1': 'value1', 'column2': 'value2'}]"
32 |     query = openai.extract_sql_query(result)
33 |     assert query is None
34 | 
35 | 
36 | @patch("mindsdb_sdk.utils.openai.query_database")
37 | def test_execute_function_call_query_database(mock_query_database):
38 |     mock_query_database.return_value = "Test Result"
39 |     mock_message = MagicMock()
40 |     mock_message.tool_calls[0].function.name = "query_database"
41 |     mock_message.tool_calls[0].function.arguments = json.dumps({"query": "SELECT * FROM test_table"})
42 |     result = openai.execute_function_call(mock_message, MagicMock())
43 |     assert result == "Test Result"
44 | 
45 | 
46 | def test_execute_function_call_no_function():
47 |     mock_message = MagicMock()
48 |     mock_message.tool_calls[0].function.name = "non_existent_function"
49 |     result = openai.execute_function_call(mock_message, MagicMock())
50 |     assert result == "Error: function non_existent_function does not exist"
51 | 


--------------------------------------------------------------------------------