├── tests
    ├── __init__.py
    ├── e2e
    │   ├── __init__.py
    │   ├── ryoma_ai
    │   │   ├── __init__.py
    │   │   ├── test_llm.py
    │   │   ├── test_datasource.py
    │   │   └── test_agent.py
    │   └── download_gpt4all_model.py
    └── unit_tests
    │   ├── __init__.py
    │   ├── test_cli.py
    │   ├── datasource
    │       └── test_duckdb.py
    │   ├── test_prompt_template.py
    │   ├── test_catalog.py
    │   ├── test_datasource.py
    │   ├── test_agent.py
    │   ├── test_tool.py
    │   ├── test_cli_fixed.py
    │   └── test_lazy_imports.py
├── packages
    ├── ryoma_lab
    │   ├── README.md
    │   ├── ryoma_lab
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── embedding.py
    │   │   │   ├── datasource.py
    │   │   │   ├── agent.py
    │   │   │   ├── tool.py
    │   │   │   ├── kernel.py
    │   │   │   ├── vector_store.py
    │   │   │   ├── prompt.py
    │   │   │   ├── cell.py
    │   │   │   └── data_catalog.py
    │   │   ├── components
    │   │   │   ├── __init__.py
    │   │   │   ├── workspace
    │   │   │   │   ├── __init__.py
    │   │   │   │   └── cell.py
    │   │   │   ├── loading_icon.py
    │   │   │   ├── code_editor.py
    │   │   │   ├── table.py
    │   │   │   ├── upload.py
    │   │   │   ├── reactflow.py
    │   │   │   ├── embedding.py
    │   │   │   ├── modal.py
    │   │   │   ├── model_selector.py
    │   │   │   ├── react_rnd.py
    │   │   │   ├── tool.py
    │   │   │   └── react_resizable_panels.py
    │   │   ├── services
    │   │   │   ├── kernel
    │   │   │   │   ├── __init__.py
    │   │   │   │   ├── pythonkernel.py
    │   │   │   │   ├── service.py
    │   │   │   │   ├── base.py
    │   │   │   │   └── sqlkernel.py
    │   │   │   ├── embedding.py
    │   │   │   └── user.py
    │   │   ├── __init__.py
    │   │   ├── templates
    │   │   │   └── __init__.py
    │   │   ├── pages
    │   │   │   ├── __init__.py
    │   │   │   ├── document.py
    │   │   │   └── settings.py
    │   │   ├── ryoma_lab.py
    │   │   ├── states
    │   │   │   ├── utils.py
    │   │   │   ├── ai.py
    │   │   │   ├── tool.py
    │   │   │   └── base.py
    │   │   ├── cli.py
    │   │   └── styles.py
    │   ├── setup.py
    │   └── pyproject.toml
    └── ryoma_ai
    │   ├── ryoma_ai
    │       ├── api
    │       │   └── __init__.py
    │       ├── llm
    │       │   └── __init__.py
    │       ├── embedding
    │       │   ├── __init__.py
    │       │   ├── config.py
    │       │   ├── factory.py
    │       │   └── client.py
    │       ├── datasource
    │       │   ├── __init__.py
    │       │   ├── sqlite.py
    │       │   ├── nosql.py
    │       │   ├── bigquery.py
    │       │   ├── base.py
    │       │   ├── file.py
    │       │   ├── duckdb.py
    │       │   └── mysql.py
    │       ├── vector_store
    │       │   └── __init__.py
    │       ├── cli
    │       │   ├── main.py
    │       │   └── __init__.py
    │       ├── __init__.py
    │       ├── tool
    │       │   ├── __init__.py
    │       │   ├── pyarrow_tool.py
    │       │   ├── pandas_tool.py
    │       │   ├── spark_tool.py
    │       │   └── python_tool.py
    │       ├── catalog
    │       │   ├── exceptions.py
    │       │   └── __init__.py
    │       ├── store
    │       │   ├── __init__.py
    │       │   ├── config.py
    │       │   └── exceptions.py
    │       ├── prompt
    │       │   ├── base.py
    │       │   └── __init__.py
    │       ├── agent
    │       │   ├── __init__.py
    │       │   ├── python_agent.py
    │       │   ├── arrow_agent.py
    │       │   ├── embedding.py
    │       │   ├── internals
    │       │   │   ├── sql_log_agent.py
    │       │   │   └── metadata_agent.py
    │       │   ├── factory.py
    │       │   ├── pandas_agent.py
    │       │   ├── spark_agent.py
    │       │   └── kernel_node.py
    │       ├── models
    │       │   ├── agent.py
    │       │   ├── catalog.py
    │       │   └── datasource.py
    │       └── states.py
    │   ├── setup.py
    │   ├── tests
    │       └── llm
    │       │   └── __init__.py
    │   ├── .editorconfig
    │   ├── README.md
    │   └── pyproject.toml
├── research
    ├── .gitignore
    ├── azure_openai.py
    └── nn
    │   └── transformer_classifier.py
├── alembic
    ├── README
    ├── script.py.mako
    ├── versions
    │   └── 34dd3ed73def_.py
    └── env.py
├── docs
    ├── source
    │   ├── ryoma-lab
    │   │   ├── data-source.md
    │   │   ├── index.md
    │   │   ├── chat.md
    │   │   └── ryomalab.md
    │   ├── reference
    │   │   ├── data-sources
    │   │   │   ├── bigquery.md
    │   │   │   ├── file.md
    │   │   │   ├── snowflake.md
    │   │   │   ├── postgresql.md
    │   │   │   └── index.md
    │   │   ├── tool
    │   │   │   ├── IPython.md
    │   │   │   ├── pandas.md
    │   │   │   ├── sql.md
    │   │   │   ├── pyspark.md
    │   │   │   ├── pyarrow.md
    │   │   │   └── index.md
    │   │   ├── agent
    │   │   │   ├── python.md
    │   │   │   ├── spark.md
    │   │   │   ├── pyarrow.md
    │   │   │   └── pandas.md
    │   │   ├── index.md
    │   │   └── api
    │   │   │   └── index.md
    │   ├── _static
    │   │   ├── ryoma.png
    │   │   └── css
    │   │   │   └── custom.css
    │   ├── architecture
    │   │   ├── img.png
    │   │   ├── img_1.png
    │   │   ├── Architecture_v1.png
    │   │   ├── multi-agent-routing-clean.png
    │   │   ├── enhanced-sql-agent-workflow.png
    │   │   ├── multi-agent-routing-system.png
    │   │   ├── reforce-sql-agent-workflow.png
    │   │   ├── architecture.md
    │   │   ├── enhanced-sql-agent-workflow.mmd
    │   │   ├── index.md
    │   │   ├── reforce-sql-agent-workflow.mmd
    │   │   └── multi-agent-routing-clean.mmd
    │   ├── tech-specs
    │   │   ├── Architecture_v1.png
    │   │   ├── index.md
    │   │   └── tech_spec_v1.md
    │   ├── roadmap
    │   │   ├── index.md
    │   │   └── roadmap.md
    │   ├── contribution
    │   │   ├── index.md
    │   │   └── contribution.md
    │   ├── installation
    │   │   └── index.md
    │   ├── requirements.txt
    │   ├── getting-started
    │   │   └── index.md
    │   └── conf.py
    ├── Makefile
    └── make.bat
├── .gitbook.yaml
├── assets
    ├── ui.png
    ├── favicon.ico
    ├── paneleft.svg
    ├── images
    │   └── coverage.svg
    ├── chakra_color_mode_provider.js
    ├── github.svg
    ├── aita_black.svg
    └── aita_white.svg
├── package.json
├── setup.cfg
├── .claude
    └── settings.local.json
├── examples
    ├── file_example.py
    ├── example_arrow.py
    └── e2e_example.py
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── config.yml
    │   ├── question.md
    │   ├── feature_request.md
    │   └── bug_report.md
    ├── workflows
    │   ├── release-drafter.yml
    │   ├── build.yml
    │   ├── gitpages.yml
    │   └── main_ryoma-demo.yml
    ├── .stale.yml
    ├── release-drafter.yml
    ├── dependabot.yml
    └── PULL_REQUEST_TEMPLATE.md
├── CHANGELOG.md
├── config.yaml
├── Dockerfile
├── .dockerignore
├── rxconfig.py
├── .pre-commit-config.yaml
├── SECURITY.md
├── scripts
    └── langchain_test.py
├── example_config.json
├── TODOs.md
└── pyproject.toml


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/e2e/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/README.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/unit_tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/research/.gitignore:
--------------------------------------------------------------------------------
1 | papers
2 | 


--------------------------------------------------------------------------------
/tests/e2e/ryoma_ai/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/api/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/llm/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/embedding/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/alembic/README:
--------------------------------------------------------------------------------
1 | Generic single-database configuration.


--------------------------------------------------------------------------------
/docs/source/ryoma-lab/data-source.md:
--------------------------------------------------------------------------------
1 | # Data Source


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/datasource/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/vector_store/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/source/reference/data-sources/bigquery.md:
--------------------------------------------------------------------------------
1 | # Bigquery


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/services/kernel/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docs/source/reference/data-sources/file.md:
--------------------------------------------------------------------------------
1 | # File Data Source


--------------------------------------------------------------------------------
/packages/ryoma_ai/setup.py:
--------------------------------------------------------------------------------
1 | __import__("setuptools").setup()
2 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/workspace/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/setup.py:
--------------------------------------------------------------------------------
1 | __import__("setuptools").setup()
2 | 


--------------------------------------------------------------------------------
/docs/source/reference/data-sources/snowflake.md:
--------------------------------------------------------------------------------
1 | # Snowflake Data Source


--------------------------------------------------------------------------------
/.gitbook.yaml:
--------------------------------------------------------------------------------
1 | root: ./docs/
2 | 
3 | structure:
4 |   readme: README.md
5 | 


--------------------------------------------------------------------------------
/docs/source/reference/data-sources/postgresql.md:
--------------------------------------------------------------------------------
1 | # Postgres Data Source
2 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/__init__.py:
--------------------------------------------------------------------------------
1 | """Base template for Reflex."""
2 | 


--------------------------------------------------------------------------------
/assets/ui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/assets/ui.png


--------------------------------------------------------------------------------
/assets/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/assets/favicon.ico


--------------------------------------------------------------------------------
/packages/ryoma_ai/tests/llm/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Tests for LLM provider functionality.
3 | """
4 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 |   "dependencies": {
3 |     "@mermaid-js/mermaid-cli": "^11.9.0"
4 |   }
5 | }
6 | 


--------------------------------------------------------------------------------
/docs/source/_static/ryoma.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/_static/ryoma.png


--------------------------------------------------------------------------------
/docs/source/architecture/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/img.png


--------------------------------------------------------------------------------
/docs/source/architecture/img_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/img_1.png


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [darglint]
2 | # https://github.com/terrencepreilly/darglint
3 | strictness = long
4 | docstring_style = google
5 | 


--------------------------------------------------------------------------------
/.claude/settings.local.json:
--------------------------------------------------------------------------------
1 | {
2 |   "permissions": {
3 |     "allow": [
4 |       "Bash(python:*)"
5 |     ],
6 |     "deny": []
7 |   }
8 | }


--------------------------------------------------------------------------------
/docs/source/architecture/Architecture_v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/Architecture_v1.png


--------------------------------------------------------------------------------
/docs/source/tech-specs/Architecture_v1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/tech-specs/Architecture_v1.png


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/templates/__init__.py:
--------------------------------------------------------------------------------
1 | from .template import ThemeState, template
2 | 
3 | __all__ = ["ThemeState", "template"]
4 | 


--------------------------------------------------------------------------------
/docs/source/architecture/multi-agent-routing-clean.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/multi-agent-routing-clean.png


--------------------------------------------------------------------------------
/docs/source/architecture/enhanced-sql-agent-workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/enhanced-sql-agent-workflow.png


--------------------------------------------------------------------------------
/docs/source/architecture/multi-agent-routing-system.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/multi-agent-routing-system.png


--------------------------------------------------------------------------------
/docs/source/architecture/reforce-sql-agent-workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/reforce-sql-agent-workflow.png


--------------------------------------------------------------------------------
/docs/source/reference/tool/IPython.md:
--------------------------------------------------------------------------------
1 | # PythonTool
2 | 
3 | PythonTool is a tool that allows you to run python script in IPython Kernel ([IPython](https://ipython.org/)).


--------------------------------------------------------------------------------
/docs/source/roadmap/index.md:
--------------------------------------------------------------------------------
 1 | (ryoma-roadmap)=
 2 | 
 3 | # Roadmap 
 4 | 
 5 | Ryoma roadmap.
 6 | 
 7 | ```{toctree}
 8 | :maxdepth: 2
 9 | 
10 | roadmap
11 | ```
12 | 


--------------------------------------------------------------------------------
/examples/file_example.py:
--------------------------------------------------------------------------------
1 | from ryoma_ai.datasource.file import FileDataSource
2 | 
3 | f = FileDataSource("./creditcard.csv")
4 | 
5 | ds = f.to_arrow(format="csv")
6 | 
7 | ds.to_table()
8 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/config.yml:
--------------------------------------------------------------------------------
1 | # Configuration: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository
2 | 
3 | blank_issues_enabled: false
4 | 


--------------------------------------------------------------------------------
/docs/source/contribution/index.md:
--------------------------------------------------------------------------------
 1 | (ryoma-contribution)=
 2 | 
 3 | # Contribution
 4 | 
 5 | Ryoma contributed documentation.
 6 | 
 7 | ```{toctree}
 8 | :maxdepth: 2
 9 | 
10 | contribution
11 | ```
12 | 


--------------------------------------------------------------------------------
/docs/source/installation/index.md:
--------------------------------------------------------------------------------
 1 | (ryoma-installation)=
 2 | 
 3 | # Installation
 4 | 
 5 | Ryoma installation instructions.
 6 | 
 7 | ```{toctree}
 8 | :maxdepth: 2
 9 | 
10 | installation
11 | ```
12 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | ## [project-title] Changelog
 2 | 
 3 | <a name="x.y.z"></a>
 4 | # x.y.z (yyyy-mm-dd)
 5 | 
 6 | *Features*
 7 | * ...
 8 | 
 9 | *Bug Fixes*
10 | * ...
11 | 
12 | *Breaking Changes*
13 | * ...
14 | 


--------------------------------------------------------------------------------
/docs/source/reference/tool/pandas.md:
--------------------------------------------------------------------------------
1 | # PandasTool
2 | Pandas tools are used by PandasAgent to interact with the data leveraging Pandas API.
3 | 
4 | ## Source
5 | 
6 | * [Pandas Tool](../../../ryoma_ai/tool/pandas.py)


--------------------------------------------------------------------------------
/docs/source/reference/tool/sql.md:
--------------------------------------------------------------------------------
1 | # Sql Tools
2 | Sql Tools are used to interact with sql databases, and used by `SqlAgent` to query sql queries. 
3 | 
4 | ## Source
5 | * [sql tool](../../../ryoma_ai/tool/sql_tool.py)


--------------------------------------------------------------------------------
/docs/source/requirements.txt:
--------------------------------------------------------------------------------
 1 | furo
 2 | markupsafe
 3 | matplotlib
 4 | myst-parser[linkify]
 5 | pillow
 6 | pydot
 7 | sphinx-copybutton
 8 | sphinx-exec-code
 9 | sphinx-gallery
10 | sphinx-tabs
11 | sphinx
12 | 


--------------------------------------------------------------------------------
/docs/source/ryoma-lab/index.md:
--------------------------------------------------------------------------------
 1 | (ryoma-aita-lab)=
 2 | 
 3 | # Aita-lab
 4 | 
 5 | Ryoma aita-lab documentation.
 6 | 
 7 | ```{toctree}
 8 | :maxdepth: 2
 9 | 
10 | chat
11 | data-source
12 | ryomalab
13 | ```
14 | 


--------------------------------------------------------------------------------
/docs/source/reference/tool/pyspark.md:
--------------------------------------------------------------------------------
1 | # pyspark tool
2 | Pyspark Tools are used by PysparkAgent to interact with the data leveraging Pyspark API. 
3 | 
4 | ## Source
5 | * [pyspark tool](../../../ryoma_ai/tool/pyspark.py)
6 | 


--------------------------------------------------------------------------------
/docs/source/reference/tool/pyarrow.md:
--------------------------------------------------------------------------------
1 | # PyArrow Tool
2 | ## Overview
3 | PyArrow Tools are used by PyArrowAgent to interact with the data leveraging PyArrow API.
4 | 
5 | ## Source
6 | * [PyArrow Tool](../../../ryoma_ai/tool/pyarrow.py)


--------------------------------------------------------------------------------
/docs/source/tech-specs/index.md:
--------------------------------------------------------------------------------
 1 | (ryoma-tech-specs)=
 2 | 
 3 | # Tech specs
 4 | 
 5 | This document describes the technical specifications of the project.
 6 | 
 7 | ```{toctree}
 8 | :maxdepth: 2
 9 | 
10 | tech_spec_v1
11 | ```
12 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/models/embedding.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Optional
2 | 
3 | import reflex as rx
4 | 
5 | 
6 | class Embedding(rx.Base):
7 |     model: str
8 |     model_parameters: Optional[dict[str, Optional[Any]]] = None
9 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/pages/__init__.py:
--------------------------------------------------------------------------------
1 | from .ai import ai
2 | from .datasource import datasource
3 | from .settings import settings
4 | from .workspace import workspace
5 | 
6 | __all__ = ["ai", "datasource", "settings", "workspace"]
7 | 


--------------------------------------------------------------------------------
/docs/source/reference/tool/index.md:
--------------------------------------------------------------------------------
 1 | (ryoma-tool)=
 2 | 
 3 | # Tool Documentation
 4 | 
 5 | Ryoma tool is an interface that can be used by AI agents to interact with the data platform.
 6 | 
 7 | ```{toctree}
 8 | :maxdepth: 2
 9 | 
10 | IPython
11 | pandas
12 | pyarrow
13 | pyspark
14 | sql
15 | ```
16 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/cli/main.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Ryoma AI CLI Main Entry Point
 4 | 
 5 | This module serves as the entry point for the Ryoma AI CLI application.
 6 | The main implementation has been refactored into modular components in the cli package.
 7 | """
 8 | 
 9 | from ryoma_ai.cli.app import main
10 | 
11 | if __name__ == "__main__":
12 |     main()
13 | 


--------------------------------------------------------------------------------
/docs/source/architecture/architecture.md:
--------------------------------------------------------------------------------
 1 | # Architecture
 2 | 
 3 | This document describes the architecture of the project.
 4 | 
 5 | ## Overview
 6 | 
 7 | ### V1 Architecture
 8 | 
 9 | ![Architecture](Architecture_v1.png)
10 | 
11 | ![img_1.png](img_1.png)
12 | 
13 | ### E2E User flow
14 | 
15 | ![img.png](img.png)
16 | 
17 | E2E user flow of Ryoma is shown in the above diagram.
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/docs/source/reference/agent/python.md:
--------------------------------------------------------------------------------
 1 | # Python Agent
 2 | 
 3 | The Python agent is an Ryoma agent that run python script in IPython Kernel ([IPython](https://ipython.org/)).
 4 | 
 5 | ## Example
 6 | 
 7 | 
 8 | ```python
 9 | from ryoma_ai.agent.python import PythonAgent
10 | 
11 | python_agent = PythonAgent("gpt-3.5-turbo")
12 | 
13 | python_agent.stream("print('Hello, World!')")
14 | ```
15 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/__init__.py:
--------------------------------------------------------------------------------
 1 | """AI Powered Data Platform"""
 2 | 
 3 | from importlib import metadata as importlib_metadata
 4 | 
 5 | 
 6 | def get_version() -> str:
 7 |     try:
 8 |         return importlib_metadata.version(__name__)
 9 |     except importlib_metadata.PackageNotFoundError:  # pragma: no cover
10 |         return "unknown"
11 | 
12 | 
13 | version: str = get_version()
14 | 


--------------------------------------------------------------------------------
/docs/source/reference/data-sources/index.md:
--------------------------------------------------------------------------------
 1 | (ryoma-data-sources)=
 2 | 
 3 | # Data Sources Documentation
 4 | 
 5 | The data sources is a list of data sources that Ryoma supports.
 6 | Ryoma can connect to these data sources, load data on the background and answer questions in natural language about the data.
 7 | 
 8 | ```{toctree}
 9 | :maxdepth: 2
10 | 
11 | bigquery
12 | file
13 | postgresql
14 | snowflake
15 | ```
16 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/ryoma_lab.py:
--------------------------------------------------------------------------------
 1 | """Welcome to Ryoma!."""
 2 | 
 3 | import reflex as rx
 4 | 
 5 | # Import all the pages.
 6 | from ryoma_lab.pages import *  # noqa: F403
 7 | from ryoma_lab.styles import global_style
 8 | 
 9 | 
10 | class State(rx.State):
11 |     """Define empty state to allow access to rx.State.router."""
12 | 
13 | 
14 | # Create the app.
15 | app = rx.App(
16 |     style=global_style,
17 | )
18 | 


--------------------------------------------------------------------------------
/config.yaml:
--------------------------------------------------------------------------------
 1 | # Ryoma Lab Configuration
 2 | 
 3 | # Vector store configuration
 4 | vector_store:
 5 |   type: pgvector
 6 |   url: postgresql://postgres:password@localhost:5432/vectordb
 7 |   collection_name: ryoma_vectors
 8 |   dimension: 768
 9 | 
10 | # Database configuration
11 | database:
12 |   url: sqlite:///./data/ryoma.db
13 | 
14 | # Other app settings
15 | app:
16 |   name: ryoma_lab
17 |   debug: true
18 |   log_level: INFO


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/models/datasource.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import reflex as rx
 4 | 
 5 | 
 6 | class DataSourceTable(rx.Model, table=True):
 7 |     """The DataSource model."""
 8 | 
 9 |     __tablename__ = "datasource"
10 | 
11 |     name: str
12 |     type: str
13 |     connection_url: Optional[str]
14 |     attributes: Optional[str]
15 |     catalog_id: Optional[int] = None
16 |     index_id: Optional[int] = None
17 | 


--------------------------------------------------------------------------------
/examples/example_arrow.py:
--------------------------------------------------------------------------------
 1 | # write an arrow ADBC connect to postgresql and ingest data into it
 2 | 
 3 | import pyarrow as pa
 4 | from adbc_driver_postgres.dbapi import connect
 5 | 
 6 | # create a table
 7 | table = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]})
 8 | 
 9 | connection = connect("postgresql://localhost:5432/postgres")
10 | 
11 | # ingest data
12 | cursor = connection.cursor()
13 | cursor.adbc_ingest("table_name", table)
14 | connection.commit()
15 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/models/agent.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import reflex as rx
 4 | from ryoma_ai.models.agent import AgentType
 5 | from sqlmodel import Field
 6 | 
 7 | 
 8 | class Agent(rx.Model, table=True):
 9 |     id: Optional[str] = Field(default=None, primary_key=True)
10 |     name: str
11 |     description: Optional[str]
12 |     type: Optional[AgentType] = Field(default=AgentType.ryoma)
13 |     workflow: Optional[str]
14 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/embedding/config.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Optional
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class EmbeddingConfig(BaseModel):
 7 |     type: str  # e.g. "openai", "huggingface", "cohere"
 8 |     model: Optional[str] = None  # e.g. "text-embedding-3-small"
 9 |     api_key: Optional[str] = None
10 |     endpoint: Optional[str] = None  # e.g. custom URL for local or hosted model
11 |     parameters: Optional[Dict[str, str]] = None  # custom model kwargs
12 | 


--------------------------------------------------------------------------------
/.github/workflows/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | name: Release Drafter
 2 | 
 3 | on:
 4 |   push:
 5 |     # branches to consider in the event; optional, defaults to all
 6 |     branches:
 7 |       - master
 8 | 
 9 | jobs:
10 |   update_release_draft:
11 |     runs-on: ubuntu-latest
12 |     steps:
13 |       # Drafts your next Release notes as Pull Requests are merged into "master"
14 |       - uses: release-drafter/release-drafter@v6.0.0
15 |         env:
16 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
17 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # create a basic python image that install poetry and use poetry to install dependencies
 2 | 
 3 | # Use the official Python image
 4 | FROM python:3.10-slim
 5 | 
 6 | # install additional dependencies like unzip
 7 | RUN apt-get update && apt-get install -y \
 8 |     build-essential \
 9 |     software-properties-common \
10 |     unzip \
11 |     git \
12 |     curl \
13 |     && apt-get clean \
14 |     && rm -rf /var/lib/apt/lists/*
15 | 
16 | # Set the working directory
17 | WORKDIR /app
18 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/models/tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import pandas as pd
 4 | import reflex as rx
 5 | 
 6 | 
 7 | class ToolArg(rx.Base):
 8 |     name: str
 9 |     required: Optional[bool]
10 |     description: Optional[str]
11 |     value: Optional[str] = ""
12 | 
13 | 
14 | class Tool(rx.Base):
15 |     id: Optional[str]
16 |     name: str
17 |     args: list[ToolArg] = []
18 |     description: Optional[str]
19 | 
20 | 
21 | class ToolOutput(rx.Base):
22 |     data: pd.DataFrame
23 |     show: bool = False
24 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/services/embedding.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from langchain_core.embeddings import Embeddings
 4 | from ryoma_ai.llm.provider import load_model_provider
 5 | 
 6 | 
 7 | def get_embedding_client(
 8 |     selected_model: str, model_parameters: dict[str, str] = None
 9 | ) -> Embeddings:
10 |     logging.info(f"Creating embedding client for {selected_model}")
11 |     return load_model_provider(
12 |         selected_model,
13 |         "embedding",
14 |         model_parameters=model_parameters,
15 |     )
16 | 


--------------------------------------------------------------------------------
/docs/source/ryoma-lab/chat.md:
--------------------------------------------------------------------------------
 1 | # Chat
 2 | The Chat page is the main interfance for the user to interact with the Ryoma AI. The playground page is divided into two sections:
 3 | 
 4 | 1. Chat Window
 5 | 2. Notebook
 6 | 
 7 | ## Chat Window
 8 | The playground window is where the user can interact with the AI. The user can ask questions, run code, and explore data using Ryoma AI.
 9 | 
10 | ## Notebook
11 | The notebook is where the Ryoma AI generates code based on the user's input. The user can run the code in the notebook to see the output.
12 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/tool/__init__.py:
--------------------------------------------------------------------------------
 1 | from ryoma_ai.tool.pandas_tool import PandasTool
 2 | from ryoma_ai.tool.pyarrow_tool import ArrowTool
 3 | from ryoma_ai.tool.python_tool import PythonTool
 4 | from ryoma_ai.tool.spark_tool import ConvertPandasToSparkTool, SparkTool
 5 | from ryoma_ai.tool.sql_tool import QueryProfileTool, SqlQueryTool
 6 | 
 7 | __all__ = [
 8 |     "PandasTool",
 9 |     "ArrowTool",
10 |     "PythonTool",
11 |     "ConvertPandasToSparkTool",
12 |     "SparkTool",
13 |     "QueryProfileTool",
14 |     "SqlQueryTool",
15 | ]
16 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/.editorconfig:
--------------------------------------------------------------------------------
 1 | # Check http://editorconfig.org for more information
 2 | # This is the main config file for this current_store:
 3 | root = true
 4 | 
 5 | [*]
 6 | charset = utf-8
 7 | end_of_line = lf
 8 | insert_final_newline = true
 9 | indent_style = space
10 | indent_size = 2
11 | trim_trailing_whitespace = true
12 | 
13 | [*.{py, pyi}]
14 | indent_style = space
15 | indent_size = 4
16 | 
17 | [Makefile]
18 | indent_style = tab
19 | 
20 | [*.md]
21 | trim_trailing_whitespace = false
22 | 
23 | [*.{diff,patch}]
24 | trim_trailing_whitespace = false
25 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/pages/document.py:
--------------------------------------------------------------------------------
 1 | """The home page of the app."""
 2 | 
 3 | import reflex as rx
 4 | from ryoma_lab import styles
 5 | from ryoma_lab.templates import template
 6 | 
 7 | 
 8 | @template(route="/document", title="Document")
 9 | def document() -> rx.Component:
10 |     """The document page.
11 | 
12 |     Returns:
13 |         The UI for the document page.
14 |     """
15 |     with open("README.md", encoding="utf-8") as readme:
16 |         content = readme.read()
17 |     return rx.markdown(content, component_map=styles.markdown_style)
18 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | # Git
 2 | .git
 3 | .gitignore
 4 | .github
 5 | 
 6 | # Docker
 7 | .dockerignore
 8 | 
 9 | # IDE
10 | .idea
11 | .vscode
12 | 
13 | # Byte-compiled / optimized / DLL files
14 | __pycache__/
15 | **/__pycache__/
16 | *.pyc
17 | *.pyo
18 | *.pyd
19 | .Python
20 | *.py[cod]
21 | *$py.class
22 | .pytest_cache/
23 | ..mypy_cache/
24 | 
25 | # poetry
26 | .venv
27 | 
28 | # C extensions
29 | *.so
30 | 
31 | # Virtual environment
32 | .venv
33 | venv
34 | 
35 | .DS_Store
36 | .AppleDouble
37 | .LSOverride
38 | ._*
39 | 
40 | # exclude research in docker build
41 | research
42 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/models/kernel.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import reflex as rx
 4 | from pydantic import Field
 5 | from ryoma_lab.models.tool import Tool, ToolOutput
 6 | 
 7 | 
 8 | class Kernel(rx.Model, table=True):
 9 |     datasource: Optional[str] = Field(None, description="Name of the datasource")
10 |     type: str
11 |     tool: Optional[str] = Field(None, description="Name of the tool")
12 |     output: Optional[str] = Field(None, description="Output of the tool")
13 | 
14 | 
15 | class ToolKernel(rx.Base):
16 |     tool: Tool
17 |     output: ToolOutput
18 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/catalog/exceptions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Exception classes for catalog operations.
 3 | """
 4 | 
 5 | from typing import Optional
 6 | 
 7 | 
 8 | class CatalogIndexError(Exception):
 9 |     """Raised when catalog indexing operations fail."""
10 | 
11 |     def __init__(
12 |         self, operation: str, catalog_id: str, cause: Optional[Exception] = None
13 |     ):
14 |         message = f"Failed to {operation} catalog '{catalog_id}'"
15 |         super().__init__(message)
16 |         self.operation = operation
17 |         self.catalog_id = catalog_id
18 |         self.cause = cause
19 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/store/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Store module for managing data sources and catalogs using LangChain stores.
 3 | """
 4 | 
 5 | from .catalog_store import CatalogIndex, CatalogStore
 6 | from .data_source_store import DataSourceRegistration, DataSourceStore
 7 | from .exceptions import CatalogNotFoundError, DataSourceNotFoundError, StoreException
 8 | 
 9 | __all__ = [
10 |     "DataSourceStore",
11 |     "DataSourceRegistration",
12 |     "CatalogStore",
13 |     "CatalogIndex",
14 |     "StoreException",
15 |     "DataSourceNotFoundError",
16 |     "CatalogNotFoundError",
17 | ]
18 | 


--------------------------------------------------------------------------------
/docs/source/reference/agent/spark.md:
--------------------------------------------------------------------------------
 1 | # Spark Agent
 2 | 
 3 | Spark agent is an Ryoma agent specialize in writing spark code.
 4 | 
 5 | ## Example
 6 | 
 7 | 
 8 | ```python
 9 | from ryoma_ai.agent.pyspark import PySparkAgent
10 | from ryoma_ai.datasource.postgres import PostgresDataSource
11 | 
12 | datasource = PostgresDataSource("postgresql://localhost:5432/db")
13 | spark_configs = {
14 |     "master": "local",
15 |     "appName": "Ryoma"
16 | }
17 | spark_agent = PySparkAgent(spark_configs, "gpt-3.5-turbo")
18 | 
19 | spark_agent.stream("I want to get the top customers which making the most purchases")
20 | ```
21 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/loading_icon.py:
--------------------------------------------------------------------------------
 1 | import reflex as rx
 2 | 
 3 | 
 4 | class LoadingIcon(rx.Component):
 5 |     """A custom loading icon component."""
 6 | 
 7 |     library = "react-loading-icons"
 8 |     tag = "SpinningCircles"
 9 |     stroke: rx.Var[str]
10 |     stroke_opacity: rx.Var[str]
11 |     fill: rx.Var[str]
12 |     fill_opacity: rx.Var[str]
13 |     stroke_width: rx.Var[str]
14 |     speed: rx.Var[str]
15 |     height: rx.Var[str]
16 | 
17 |     def get_event_triggers(self) -> dict:
18 |         return {"on_change": lambda status: [status]}
19 | 
20 | 
21 | loading_icon = LoadingIcon.create
22 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/question.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: ❓ Question
 3 | about: Ask a question about this project 🎓
 4 | title: ''
 5 | labels: question
 6 | assignees:
 7 | ---
 8 | 
 9 | ## Checklist
10 | 
11 | <!-- Mark with an `x` all the checkboxes that apply (like `[x]`) -->
12 | 
13 | - [ ] I've searched the project's [`issues`](https://github.com/ryoma/ryoma/issues?q=is%3Aissue).
14 | 
15 | ## ❓ Question
16 | 
17 | <!-- What is your question -->
18 | 
19 | How can I [...]?
20 | 
21 | Is it possible to [...]?
22 | 
23 | ## 📎 Additional context
24 | 
25 | <!-- Add any other context or screenshots about the feature request here. -->
26 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/prompt/base.py:
--------------------------------------------------------------------------------
 1 | from langchain_core.prompts import ChatPromptTemplate
 2 | 
 3 | BasePromptTemplate = ChatPromptTemplate.from_messages(
 4 |     messages=[
 5 |         (
 6 |             "system",
 7 |             """
 8 | You are an expert in the field of data science, analysis, and data engineering.
 9 | """,
10 |         )
11 |     ]
12 | )
13 | 
14 | BasicContextPromptTemplate = ChatPromptTemplate.from_messages(
15 |     messages=[
16 |         (
17 |             "system",
18 |             """
19 | You are provided with the following context:
20 | {prompt_context}
21 | 
22 | """,
23 |         )
24 |     ]
25 | )
26 | 


--------------------------------------------------------------------------------
/tests/unit_tests/test_cli.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Quick test script for the Ryoma SQL CLI in development mode.
 4 | """
 5 | 
 6 | import sys
 7 | from pathlib import Path
 8 | 
 9 | from ryoma_ai.cli.main import main
10 | 
11 | # Add the ryoma_ai package to Python path
12 | ryoma_ai_path = Path(__file__).parent / "packages" / "ryoma_ai"
13 | sys.path.insert(0, str(ryoma_ai_path))
14 | 
15 | if __name__ == "__main__":
16 |     # You can modify sys.argv to test different arguments
17 |     # sys.argv = ["test_cli.py", "--setup"]  # Test setup mode
18 |     # sys.argv = ["test_cli.py", "--help"]   # Test help
19 | 
20 |     main()
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 🚀 Feature request
 3 | about: Suggest an idea for this project 🏖
 4 | title: ''
 5 | labels: enhancement
 6 | assignees:
 7 | ---
 8 | 
 9 | ## 🚀 Feature Request
10 | 
11 | <!-- A clear and concise description of the feature proposal. -->
12 | 
13 | ## 🔈 Motivation
14 | 
15 | <!-- Please describe the motivation for this proposal. -->
16 | 
17 | ## 🛰 Alternatives
18 | 
19 | <!-- A clear and concise description of any alternative solutions or features you've considered. -->
20 | 
21 | ## 📎 Additional context
22 | 
23 | <!-- Add any other context or screenshots about the feature request here. -->
24 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/catalog/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Catalog management module for Ryoma AI.
 3 | 
 4 | This module provides unified catalog indexing and search functionality.
 5 | """
 6 | 
 7 | from ryoma_ai.catalog.exceptions import CatalogIndexError
 8 | from ryoma_ai.catalog.indexer import (
 9 |     CatalogIndexer,
10 |     HierarchicalCatalogIndexer,
11 |     IndexLevel,
12 |     UnifiedCatalogIndexService,
13 |     VectorIndexer,
14 | )
15 | 
16 | __all__ = [
17 |     "CatalogIndexer",
18 |     "CatalogIndexError",
19 |     "HierarchicalCatalogIndexer",
20 |     "IndexLevel",
21 |     "UnifiedCatalogIndexService",
22 |     "VectorIndexer",
23 | ]
24 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/agent/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Ryoma AI Agent Module
 3 | 
 4 | This module provides various types of AI agents including chat agents,
 5 | workflow agents, and SQL agents with factory pattern support.
 6 | """
 7 | 
 8 | from .base import BaseAgent
 9 | from .chat_agent import ChatAgent
10 | from .sql import BasicSqlAgent, EnhancedSqlAgentImpl, ReFoRCESqlAgentImpl, SqlAgent
11 | from .workflow import ToolMode, WorkflowAgent
12 | 
13 | __all__ = [
14 |     "BaseAgent",
15 |     "ChatAgent",
16 |     "WorkflowAgent",
17 |     "ToolMode",
18 |     "SqlAgent",
19 |     "BasicSqlAgent",
20 |     "EnhancedSqlAgentImpl",
21 |     "ReFoRCESqlAgentImpl",
22 | ]
23 | 


--------------------------------------------------------------------------------
/docs/source/getting-started/index.md:
--------------------------------------------------------------------------------
 1 | (ryoma-getting-started)=
 2 | 
 3 | # 🚀 Getting Started
 4 | 
 5 | Welcome to Ryoma! This guide will help you get up and running with Ryoma's AI-powered data analysis capabilities.
 6 | 
 7 | ## 🎯 What You'll Learn
 8 | 
 9 | - **Quick Setup** - Install and configure Ryoma in minutes
10 | - **Basic Usage** - Connect to data sources and ask questions
11 | - **Advanced Features** - Leverage enhanced SQL agents and profiling
12 | - **Best Practices** - Production-ready configurations
13 | 
14 | ```{toctree}
15 | :maxdepth: 2
16 | 
17 | quickstart
18 | cli-usage
19 | configuration-reference
20 | advanced-setup
21 | troubleshooting
22 | examples
23 | ```
24 | 


--------------------------------------------------------------------------------
/docs/source/reference/agent/pyarrow.md:
--------------------------------------------------------------------------------
 1 | # Pyarrow Agent
 2 | 
 3 | The Pyarrow agent is an Ryoma agent that runs on the Pyarrow library.
 4 | The Pyarrow agent can be used to ask questions in natural language and interact with Pyarrow Tables.
 5 | 
 6 | ## Example
 7 | 
 8 | ```python
 9 | from ryoma_ai.agent.pyarrow import PyArrowAgent
10 | import pyarrow as pa
11 | 
12 | table = pa.table({
13 |     'customer_id': pa.array([1, 2, 3, 4, 5]),
14 |     'purchase_amount': pa.array([100, 200, 300, 400, 500])
15 | })
16 | 
17 | pa_agent = PyArrowAgent("gpt-3.5-turbo")
18 |     .add_table(table)
19 | 
20 | pa_agent.stream("I want to get the top customers which making the most purchases")
21 | ```
22 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/cli/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Ryoma AI CLI Module
 3 | 
 4 | Modular, object-oriented CLI implementation for the Ryoma AI multi-agent system.
 5 | """
 6 | 
 7 | from .agent_manager import AgentManager
 8 | from .app import RyomaAI, main
 9 | from .catalog_manager import CatalogManager
10 | from .command_handler import CommandHandler
11 | from .config_manager import ConfigManager
12 | from .datasource_manager import DataSourceManager
13 | from .display_manager import DisplayManager
14 | 
15 | __all__ = [
16 |     "RyomaAI",
17 |     "main",
18 |     "ConfigManager",
19 |     "DataSourceManager",
20 |     "AgentManager",
21 |     "DisplayManager",
22 |     "CatalogManager",
23 |     "CommandHandler",
24 | ]
25 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/tool/pyarrow_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Type
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | from ryoma_ai.tool.python_tool import PythonTool
 5 | 
 6 | 
 7 | class ArrowInput(BaseModel):
 8 |     script: str = Field(description="PyArrow analysis script")
 9 | 
10 | 
11 | class ArrowTool(PythonTool):
12 |     """Tool for using Apache Arrow in Python."""
13 | 
14 |     name: str = "pyarrow_tool"
15 |     description: str = """
16 |     Apache Arrow is a cross-language development platform for in-memory data analysis.
17 |     This tool allows you to run PyArrow script in Python.
18 | 
19 |     PyArrow Table is available in the script context.
20 |     """
21 |     args_schema: Type[BaseModel] = ArrowInput
22 | 


--------------------------------------------------------------------------------
/.github/.stale.yml:
--------------------------------------------------------------------------------
 1 | # Number of days of inactivity before an issue becomes stale
 2 | daysUntilStale: 60
 3 | # Number of days of inactivity before a stale issue is closed
 4 | daysUntilClose: 7
 5 | # Issues with these labels will never be considered stale
 6 | exemptLabels:
 7 |   - pinned
 8 |   - security
 9 | # Label to use when marking an issue as stale
10 | staleLabel: wontfix
11 | # Comment to post when marking an issue as stale. Set to `false` to disable
12 | markComment: >
13 |   This issue has been automatically marked as stale because it has not had
14 |   recent activity. It will be closed if no further activity occurs. Thank you
15 |   for your contributions.
16 | # Comment to post when closing a stale issue. Set to `false` to disable
17 | closeComment: false
18 | 


--------------------------------------------------------------------------------
/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from typing import Sequence, Union
 9 | 
10 | from alembic import op
11 | import sqlalchemy as sa
12 | ${imports if imports else ""}
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision: str = ${repr(up_revision)}
16 | down_revision: Union[str, None] = ${repr(down_revision)}
17 | branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
18 | depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
19 | 
20 | 
21 | def upgrade() -> None:
22 |     ${upgrades if upgrades else "pass"}
23 | 
24 | 
25 | def downgrade() -> None:
26 |     ${downgrades if downgrades else "pass"}
27 | 


--------------------------------------------------------------------------------
/tests/e2e/ryoma_ai/test_llm.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ryoma_ai.llm.provider import load_model_provider
 3 | 
 4 | 
 5 | def test_gp44all_model():
 6 |     """Test GPT4All model loading - will download model on first use."""
 7 |     model_id = "gpt4all:Llama-3.2-1B-Instruct-Q4_0.gguf"
 8 | 
 9 |     # Try to load the model
10 |     gp44all_model = load_model_provider(model_id)
11 | 
12 |     if gp44all_model is None:
13 |         pytest.skip(
14 |             f"GPT4All model {model_id} not available. Model needs to be downloaded first."
15 |         )
16 | 
17 |     # Test basic functionality
18 |     response = gp44all_model.invoke("What is 2+2?")
19 |     assert response is not None
20 |     assert len(str(response)) > 0
21 |     print(f"GPT4All response: {response}")
22 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/datasource/sqlite.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import ibis
 4 | from ibis import BaseBackend
 5 | from pydantic import BaseModel, Field
 6 | from ryoma_ai.datasource.sql import SqlDataSource
 7 | 
 8 | 
 9 | class SqliteConfig(BaseModel):
10 |     connection_url: str = Field(..., description="Sqlite connection URL")
11 | 
12 | 
13 | class SqliteDataSource(SqlDataSource):
14 |     def get_query_plan(self, query: str) -> Any:
15 |         pass
16 | 
17 |     def crawl_catalog(self, **kwargs):
18 |         pass
19 | 
20 |     def __init__(self, connection_url: str):
21 |         super().__init__()
22 |         self.connection_url = connection_url
23 | 
24 |     def _connect(self) -> BaseBackend:
25 |         return ibis.sqlite.connect(self.connection_url)
26 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/code_editor.py:
--------------------------------------------------------------------------------
 1 | import reflex as rx
 2 | 
 3 | 
 4 | class ReactCodeMirror(rx.Component):
 5 |     library = "@uiw/react-codemirror"
 6 | 
 7 |     lib_dependencies: list[str] = ["@uiw/codemirror-extensions-langs"]
 8 | 
 9 |     tag = "CodeMirror"
10 | 
11 |     is_default = True
12 | 
13 |     value: rx.Var[str]
14 | 
15 |     height: rx.Var[str]
16 | 
17 |     minHeight: rx.Var[str]
18 | 
19 |     width: rx.Var[str]
20 | 
21 |     minWidth: rx.Var[str]
22 | 
23 |     theme: rx.Var[str]
24 | 
25 |     extensions: rx.Var[str]
26 |     on_change: rx.EventHandler[lambda value: [value]]
27 | 
28 |     def add_imports(self):
29 |         return {"@uiw/codemirror-extensions-langs": "loadLanguage"}
30 | 
31 | 
32 | codeeditor = ReactCodeMirror.create
33 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/models/vector_store.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from typing import Optional
 3 | 
 4 | import reflex as rx
 5 | 
 6 | 
 7 | class DocumentProject(rx.Model, table=True):
 8 |     """
 9 |     Represents a document project/workspace that uses vector storage.
10 |     Multiple projects can exist, each with their own document collections.
11 |     The actual vector store configuration comes from rxconfig.py.
12 |     """
13 | 
14 |     project_name: str  # Unique identifier for the project/workspace
15 |     description: Optional[str] = None  # Human-readable description
16 |     document_count: int = 0  # Number of documents indexed
17 |     created_at: Optional[datetime] = None
18 |     updated_at: Optional[datetime] = None
19 |     is_active: bool = True
20 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/agent/python_agent.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, Optional
 2 | 
 3 | from ryoma_ai.agent.workflow import WorkflowAgent
 4 | from ryoma_ai.tool.python_tool import PythonTool
 5 | 
 6 | 
 7 | class PythonAgent(WorkflowAgent):
 8 |     description: str = "A Python agent that can use Python tools to run python scripts."
 9 | 
10 |     def __init__(
11 |         self,
12 |         model: str,
13 |         model_parameters: Optional[Dict] = None,
14 |     ):
15 |         super().__init__([PythonTool()], model, model_parameters)
16 | 
17 |     def add_script_context(self, script_context):
18 |         for tool in self.tools:
19 |             if isinstance(tool, PythonTool):
20 |                 tool.update_script_context(script_context=script_context)
21 |         return self
22 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/datasource/nosql.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | try:
 4 |     import boto3
 5 | except ImportError:
 6 |     boto3 = None
 7 | 
 8 | from ryoma_ai.datasource.base import DataSource
 9 | from ryoma_ai.datasource.metadata import Catalog
10 | 
11 | 
12 | class DynamodbDataSource(DataSource):
13 |     def __init__(self, name: str, region_name: str = None, **kwargs):
14 |         super().__init__(name=name, type="nosql", **kwargs)
15 |         self.region_name = region_name
16 |         self.client = boto3.client("dynamodb", region_name=region_name)
17 | 
18 |     def get_catalog(self, table_name: str) -> List[Catalog]:
19 |         response = self.client.describe_table(TableName=table_name)
20 |         return response["Table"]
21 | 
22 | 
23 | class DynamodbConfig:
24 |     pass
25 | 


--------------------------------------------------------------------------------
/tests/unit_tests/datasource/test_duckdb.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | from ryoma_ai.datasource.duckdb import DuckDBDataSource
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def test_pandas_df():
 8 |     return pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
 9 | 
10 | 
11 | def test_query_with_register(test_pandas_df):
12 |     data_source = DuckDBDataSource()
13 |     data_source.register("pdf", test_pandas_df)
14 |     query = "SELECT * FROM pdf"
15 |     result = data_source.query(query)
16 |     assert result.shape == test_pandas_df.shape
17 | 
18 | 
19 | def test_query(test_pandas_df):
20 |     data_source = DuckDBDataSource()
21 |     pdf = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
22 |     result = data_source.query("SELECT * FROM pdf", pdf=pdf)
23 |     assert result.shape == test_pandas_df.shape
24 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/services/user.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | 
 3 | import reflex as rx
 4 | from ryoma_lab.states.base import User
 5 | 
 6 | 
 7 | class UserService:
 8 |     def __init__(self):
 9 |         self.session = rx.session()
10 | 
11 |     def __enter__(self):
12 |         return self
13 | 
14 |     def __exit__(self, exc_type, exc_val, exc_tb):
15 |         self.session.close()
16 | 
17 |     def create_user(
18 |         self, username: str, email: str, hashed_password: str, permissions: dict
19 |     ):
20 |         user = User(
21 |             id=str(uuid.uuid4()),
22 |             username=username,
23 |             email=email,
24 |             hashed_password=hashed_password,
25 |             permissions=permissions,
26 |         )
27 |         self.session.add(user)
28 |         self.session.commit()
29 | 


--------------------------------------------------------------------------------
/assets/paneleft.svg:
--------------------------------------------------------------------------------
 1 | <svg width="12" height="12" viewBox="0 0 12 12" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <g id="PaneLeft" clip-path="url(#clip0_469_1942)">
 3 | <g id="Vector">
 4 | <path fill-rule="evenodd" clip-rule="evenodd" d="M7.80217 0.525009C7.34654 0.525009 6.97717 0.894373 6.97717 1.35001V10.65C6.97717 11.1056 7.34654 11.475 7.80217 11.475H10.6522C11.1078 11.475 11.4772 11.1056 11.4772 10.65V1.35001C11.4772 0.894373 11.1078 0.525009 10.6522 0.525009H7.80217ZM8.02717 10.425V1.57501H10.4272V10.425H8.02717Z" fill="#494369"/>
 5 | <path d="M3.78215 8.14502L2.16213 6.525H5.92717V5.475H2.16213L3.78215 3.85498L3.03969 3.11252L0.523438 5.62877V6.37123L3.03969 8.88748L3.78215 8.14502Z" fill="#494369"/>
 6 | </g>
 7 | </g>
 8 | <defs>
 9 | <clipPath id="clip0_469_1942">
10 | <rect width="12" height="12" fill="white"/>
11 | </clipPath>
12 | </defs>
13 | </svg>
14 | 


--------------------------------------------------------------------------------
/docs/source/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | @import "../basic.css";
 2 | 
 3 | html[data-theme="dark"] {
 4 |     --pst-color-text-base: #5c8fda;
 5 | }
 6 | 
 7 | .d2h-del {
 8 |     background-color: var(--pst-color-danger-highlight);
 9 |     color: var(--pst-color-text-base);
10 | }
11 | 
12 | .d2h-ins {
13 |     background-color: var(--pst-color-success);
14 |     color: var(--pst-color-text-base);
15 | }
16 | 
17 | .d2h-change {
18 |     background-color: var(--yellow);
19 |     color: var(--pst-color-text-base);
20 | }
21 | 
22 | .sphinx-tabs-panel {
23 |     background-color: var(--pst-color-background);
24 | }
25 | 
26 | .sphinx-tabs-tab {
27 |     background-color: var(--pst-color-background);
28 | }
29 | 
30 | .sphinx-tabs {
31 |     background-color: var(--pst-color-background);
32 | }
33 | 
34 | .closeable{
35 |     background-color: var(--pst-color-background);
36 | }
37 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/services/kernel/pythonkernel.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | from IPython.core.interactiveshell import InteractiveShell
 4 | from ryoma_lab.services.kernel.base import BaseKernel
 5 | 
 6 | 
 7 | class PythonKernel(BaseKernel):
 8 |     def execute(self, code: str) -> Dict[str, Any]:
 9 |         shell = InteractiveShell.instance()
10 |         result = shell.run_cell(code, store_history=False)
11 | 
12 |         if result.success:
13 |             return self._create_success_response(result.result)
14 |         elif result.error_before_exec:
15 |             return self._create_error_response(result.error_before_exec)
16 |         elif result.error_in_exec:
17 |             return self._create_error_response(result.error_in_exec)
18 |         else:
19 |             return self._create_error_response(Exception("An unknown error occurred"))
20 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/tool/pandas_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Type
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | from ryoma_ai.datasource.sql import SqlDataSource
 5 | from ryoma_ai.tool.python_tool import PythonTool
 6 | 
 7 | 
 8 | class PandasInput(BaseModel):
 9 |     script: str = Field(description="pandas script")
10 | 
11 | 
12 | class PandasTool(PythonTool):
13 |     """Tool for running Pandas analysis."""
14 | 
15 |     name: str = "pandas_tool"
16 |     description: str = """
17 |     Run a python script by using the Pandas library.
18 |     If the script is not correct, an error message will be returned.
19 | 
20 |     Pandas dataframes are stored in the script context.
21 |     """
22 |     datasource: Optional[SqlDataSource] = Field(
23 |         None, exclude=True, description="SQL data source"
24 |     )
25 | 
26 |     args_schema: Type[BaseModel] = PandasInput
27 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/table.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | 
 3 | import reflex as rx
 4 | 
 5 | 
 6 | def table(tabular_data: list[list]):
 7 |     return rx.table.root(
 8 |         rx.table.header(
 9 |             rx.table.row(
10 |                 *[rx.table.column_header_cell(cell) for cell in tabular_data[0]],
11 |             ),
12 |         ),
13 |         rx.table.body(
14 |             *[
15 |                 rx.table.row(
16 |                     *[
17 |                         (
18 |                             rx.table.row_header_cell(cell)
19 |                             if i == 0
20 |                             else rx.table.cell(cell)
21 |                         )
22 |                         for i, cell in enumerate(row)
23 |                     ],
24 |                 )
25 |                 for row in tabular_data[1:]
26 |             ],
27 |         ),
28 |     )
29 | 


--------------------------------------------------------------------------------
/docs/source/contribution/contribution.md:
--------------------------------------------------------------------------------
 1 | # Contribution
 2 | 
 3 | ## How to contribute
 4 | 
 5 | 1. Fork & Clone the repository
 6 | 
 7 | 2. Create a new branch
 8 | 
 9 | ## Environment setup
10 | 
11 | We are using python 3.9+ for this project. You can install the required packages by running the following command:
12 | 
13 | ```bash
14 | make install
15 | ```
16 | 
17 | ## Running the tests
18 | 
19 | You can run the tests by running the following command:
20 | 
21 | ```bash
22 | make test
23 | ```
24 | 
25 | ## Check and fix the code style
26 | 
27 | You can check the code style by running the following command:
28 | 
29 | ```bash
30 | make check-codestyle
31 | ```
32 | 
33 | to fix the code style run the following command:
34 | 
35 | ```bash
36 | make codestyle
37 | ```
38 | 
39 | ## Build the project
40 | 
41 | You can build the project by running the following command:
42 | 
43 | ```bash
44 | make build
45 | ```
46 | 


--------------------------------------------------------------------------------
/tests/unit_tests/test_prompt_template.py:
--------------------------------------------------------------------------------
 1 | from ryoma_ai.prompt.prompt_template import PromptTemplateFactory
 2 | 
 3 | 
 4 | def test_base_prompt_template():
 5 |     ryoma_prompt = PromptTemplateFactory()
 6 |     ryoma_prompt.set_base_template("This is a test prompt.")
 7 |     template = ryoma_prompt.build_prompt()
 8 |     messages = template.format_messages()
 9 |     assert messages[0].content == "This is a test prompt."
10 | 
11 | 
12 | def test_prompt_template():
13 |     ryoma_prompt = PromptTemplateFactory()
14 |     ryoma_prompt.add_context_template(
15 |         "You are provided with the following context: {prompt_context}"
16 |     )
17 |     template = ryoma_prompt.build_prompt()
18 |     messages = template.format_messages(prompt_context="This is a test context.")
19 |     assert (
20 |         messages[1].content
21 |         == "You are provided with the following context: This is a test context."
22 |     )
23 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | # Release drafter configuration https://github.com/release-drafter/release-drafter#configuration
 2 | # Emojis were chosen to match the https://gitmoji.carloscuesta.me/
 3 | 
 4 | name-template: "v$NEXT_PATCH_VERSION"
 5 | tag-template: "v$NEXT_PATCH_VERSION"
 6 | 
 7 | categories:
 8 |   - title: ":rocket: Features"
 9 |     labels: [enhancement, feature]
10 |   - title: ":wrench: Fixes & Refactoring"
11 |     labels: [bug, refactoring, bugfix, fix]
12 |   - title: ":package: Build System & CI/CD"
13 |     labels: [build, ci, testing]
14 |   - title: ":boom: Breaking Changes"
15 |     labels: [breaking]
16 |   - title: ":pencil: Documentation"
17 |     labels: [documentation]
18 |   - title: ":arrow_up: Dependencies updates"
19 |     labels: [dependencies]
20 | 
21 | template: |
22 |   ## What’s Changed
23 | 
24 |   $CHANGES
25 | 
26 |   ## :busts_in_silhouette: List of contributors
27 | 
28 |   $CONTRIBUTORS
29 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/models/prompt.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import reflex as rx
 4 | from sqlmodel import Field
 5 | 
 6 | 
 7 | class PromptTemplate(rx.Model, table=True):
 8 |     prompt_repr: str = Field(
 9 |         ..., description="The prompt representation, e.g. SQL, TEXT, etc."
10 |     )
11 |     k_shot: int = Field(..., description="The number of examples to use in the prompt.")
12 |     example_format: str
13 |     selector_type: str = Field(
14 |         ...,
15 |         description="The type of selector to use for the prompt. e.g. COSSIMILAR, RANDOM, etc.",
16 |     )
17 |     prompt_template_name: str = Field(
18 |         ..., description="The name of the prompt template."
19 |     )
20 |     prompt_lines: str = Field(..., description="The prompt template lines.")
21 |     prompt_template_type: Optional[str] = Field(
22 |         default="custom", description="The type of prompt template."
23 |     )
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: 🐛 Bug report
 3 | about: If something isn't working 🔧
 4 | title: ''
 5 | labels: bug
 6 | assignees:
 7 | ---
 8 | 
 9 | ## 🐛 Bug Report
10 | 
11 | <!-- A clear and concise description of what the bug is. -->
12 | 
13 | ## 🔬 How To Reproduce
14 | 
15 | Steps to reproduce the behavior:
16 | 
17 | 1. ...
18 | 
19 | ### Code sample
20 | 
21 | <!-- If applicable, attach a minimal code sample to reproduce the decried issue. -->
22 | 
23 | ### Environment
24 | 
25 | * OS: [e.g. Linux / Windows / macOS]
26 | * Python version, get it with:
27 | 
28 | ```bash
29 | python --version
30 | ```
31 | 
32 | ### Screenshots
33 | 
34 | <!-- If applicable, add screenshots to help explain your problem. -->
35 | 
36 | ## 📈 Expected behavior
37 | 
38 | <!-- A clear and concise description of what you expected to happen. -->
39 | 
40 | ## 📎 Additional context
41 | 
42 | <!-- Add any other context about the problem here. -->
43 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/models/agent.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | from enum import Enum
 3 | from typing import Any, Dict, List
 4 | 
 5 | 
 6 | class AgentType(Enum):
 7 |     ryoma = "ryoma_ai"
 8 |     chat = "chat"
 9 |     base = "base"
10 |     embedding = "embedding"
11 |     workflow = "workflow"
12 |     custom = "custom"
13 | 
14 | 
15 | class SqlAgentMode(Enum):
16 |     basic = "basic"
17 |     enhanced = "enhanced"
18 |     reforce = "reforce"
19 | 
20 | 
21 | @dataclass
22 | class ColumnExplorationResult:
23 |     """Result from column exploration phase."""
24 | 
25 |     exploration_queries: List[str]
26 |     exploration_results: List[str]
27 |     relevant_columns: List[str]
28 |     column_insights: Dict[str, Any]
29 | 
30 | 
31 | @dataclass
32 | class FormatRestriction:
33 |     """Expected answer format restriction."""
34 | 
35 |     format_description: str
36 |     column_names: List[str]
37 |     data_types: List[str]
38 |     example_format: str
39 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/models/catalog.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import asdict, dataclass
 2 | from datetime import datetime
 3 | from typing import Any, Dict
 4 | 
 5 | 
 6 | @dataclass
 7 | class CatalogIndex:
 8 |     """Catalog index metadata."""
 9 | 
10 |     catalog_id: str
11 |     data_source_id: str
12 |     catalog_name: str
13 |     indexed_at: datetime
14 |     schema_count: int
15 |     table_count: int
16 |     column_count: int
17 |     index_level: str  # catalog, schema, table, column
18 | 
19 |     def to_dict(self) -> Dict[str, Any]:
20 |         """Convert to dictionary for storage."""
21 |         data = asdict(self)
22 |         data["indexed_at"] = self.indexed_at.isoformat()
23 |         return data
24 | 
25 |     @classmethod
26 |     def from_dict(cls, data: Dict[str, Any]) -> "CatalogIndex":
27 |         """Create from dictionary loaded from storage."""
28 |         data["indexed_at"] = datetime.fromisoformat(data["indexed_at"])
29 |         return cls(**data)
30 | 


--------------------------------------------------------------------------------
/research/azure_openai.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from azure.identity import DefaultAzureCredential, get_bearer_token_provider
 4 | from openai import AzureOpenAI
 5 | 
 6 | endpoint = os.environ["AZURE_OPENAI_ENDPOINT"]
 7 | api_key = os.environ["AZURE_OPENAI_API_KEY"]
 8 | deployment = os.environ["CHAT_COMPLETIONS_DEPLOYMENT_NAME"]
 9 | search_endpoint = os.environ["SEARCH_ENDPOINT"]
10 | search_index = os.environ["SEARCH_INDEX"]
11 | 
12 | token_provider = get_bearer_token_provider(
13 |     DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default"
14 | )
15 | 
16 | client = AzureOpenAI(
17 |     azure_endpoint=endpoint,
18 |     api_key=api_key,
19 |     api_version="2024-02-01",
20 | )
21 | 
22 | # completion = client.chat.completions.create(
23 | #     model=deployment,
24 | #     messages=[
25 | #         {
26 | #             "role": "user",
27 | #             "content": "Hi, how are you?",
28 | #         },
29 | #     ],
30 | # )
31 | #
32 | # print(completion.to_json())
33 | 


--------------------------------------------------------------------------------
/assets/images/coverage.svg:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <svg xmlns="http://www.w3.org/2000/svg" width="99" height="20">
 3 |     <linearGradient id="b" x2="0" y2="100%">
 4 |         <stop offset="0" stop-color="#bbb" stop-opacity=".1"/>
 5 |         <stop offset="1" stop-opacity=".1"/>
 6 |     </linearGradient>
 7 |     <mask id="a">
 8 |         <rect width="99" height="20" rx="3" fill="#fff"/>
 9 |     </mask>
10 |     <g mask="url(#a)">
11 |         <path fill="#555" d="M0 0h63v20H0z"/>
12 |         <path fill="#e05d44" d="M63 0h36v20H63z"/>
13 |         <path fill="url(#b)" d="M0 0h99v20H0z"/>
14 |     </g>
15 |     <g fill="#fff" text-anchor="middle" font-family="DejaVu Sans,Verdana,Geneva,sans-serif" font-size="11">
16 |         <text x="31.5" y="15" fill="#010101" fill-opacity=".3">coverage</text>
17 |         <text x="31.5" y="14">coverage</text>
18 |         <text x="80" y="15" fill="#010101" fill-opacity=".3">34%</text>
19 |         <text x="80" y="14">34%</text>
20 |     </g>
21 | </svg>
22 | 


--------------------------------------------------------------------------------
/tests/unit_tests/test_catalog.py:
--------------------------------------------------------------------------------
 1 | from ryoma_ai.datasource.metadata import Catalog
 2 | 
 3 | data = {
 4 |     "catalog_name": "main",
 5 |     "schemas": [
 6 |         {
 7 |             "schema_name": "",
 8 |             "tables": [
 9 |                 {
10 |                     "table_name": "author",
11 |                     "columns": [
12 |                         {"name": "aid", "type": "INT", "nullable": 1},
13 |                         {"name": "homepage", "type": "TEXT", "nullable": 1},
14 |                         {"name": "name", "type": "TEXT", "nullable": 1},
15 |                         {"name": "oid", "type": "INT", "nullable": 1},
16 |                     ],
17 |                 }
18 |             ],
19 |         }
20 |     ],
21 | }
22 | 
23 | 
24 | def test_catalog_model():
25 |     catalog = Catalog(**data)
26 |     assert catalog.catalog_name == "main"
27 |     assert len(catalog.schemas) == 1
28 |     assert catalog.schemas[0].schema_name == ""
29 |     assert len(catalog.schemas[0].tables) == 1
30 | 


--------------------------------------------------------------------------------
/docs/source/ryoma-lab/ryomalab.md:
--------------------------------------------------------------------------------
 1 | # Ryoma Lab
 2 | 
 3 | Welcome to Ryoma Lab! Ryoma lab is an interactive platform where you can ask questions, run code, and explore data using Ryoma AI.
 4 | 
 5 | ## How to get started?
 6 | 1. Before you start, make sure you have ryoma installed by running the following command:
 7 | ```text
 8 | pip install ryoma
 9 | ```
10 | 2. Once installed, you can setup configuration file `rxconfig.py` in your project:
11 | ```python
12 | import reflex as rx
13 | from reflex.constants import LogLevel
14 | 
15 | config = rx.Config(
16 |     app_name="ryoma_lab",
17 |     loglevel=LogLevel.INFO,
18 | )
19 | ```
20 | more information on configuration can be found [Reflex config](https://reflex.dev/docs/getting-started/configuration/).
21 | 
22 | 3. Now you can run the following command to start Ryoma Lab:
23 | ```bash
24 | ryoma_lab run
25 | ```
26 | 
27 | The app will start running on `http://localhost:3000/`. You can open this URL in your browser to start using Ryoma Lab.
28 | 
29 | ## Components
30 | 
31 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | # Configuration: https://dependabot.com/docs/config-file/
 2 | # Docs: https://docs.github.com/en/github/administering-a-repository/keeping-your-dependencies-updated-automatically
 3 | 
 4 | version: 2
 5 | 
 6 | updates:
 7 | #   - package-ecosystem: "pip"
 8 | #     directory: "/"
 9 | #     schedule:
10 | #       interval: "daily"
11 | #     allow:
12 | #       - dependency-type: "all"
13 | #     commit-message:
14 | #       prefix: ":arrow_up:"
15 | #     open-pull-requests-limit: 50
16 | 
17 |   - package-ecosystem: "github-actions"
18 |     directory: "/"
19 |     schedule:
20 |       interval: "daily"
21 |     allow:
22 |       - dependency-type: "all"
23 |     commit-message:
24 |       prefix: ":arrow_up:"
25 |     open-pull-requests-limit: 50
26 | 
27 |   - package-ecosystem: "docker"
28 |     directory: "/docker"
29 |     schedule:
30 |       interval: "weekly"
31 |     allow:
32 |       - dependency-type: "all"
33 |     commit-message:
34 |       prefix: ":arrow_up:"
35 |     open-pull-requests-limit: 50
36 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/agent/arrow_agent.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict
 2 | 
 3 | import pyarrow as pa
 4 | from ryoma_ai.agent.workflow import WorkflowAgent
 5 | from ryoma_ai.tool.pyarrow_tool import ArrowTool
 6 | from ryoma_ai.tool.python_tool import PythonTool
 7 | 
 8 | 
 9 | class ArrowAgent(WorkflowAgent):
10 |     description: str = (
11 |         "An Arrow agent that can use Arrow tools to interact with Arrow Tables."
12 |     )
13 | 
14 |     def __init__(self, model: str, model_parameters: Dict = None):
15 |         super().__init__([ArrowTool()], model, model_parameters)
16 | 
17 |     def add_table(self, table: pa.Table):
18 |         table_id = f"table_{id(table)}"
19 |         self.add_prompt(
20 |             f"""
21 |         pyarrow table name: {table_id}
22 |         pyarrow table metadata: {table.schema}
23 |         """
24 |         )
25 |         for tool in self.tools:
26 |             if isinstance(tool, PythonTool):
27 |                 tool.update_script_context(script_context={table_id: table})
28 |         return self
29 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/embedding/factory.py:
--------------------------------------------------------------------------------
 1 | from ryoma_ai.embedding.client import EmbeddingClient, LangchainEmbeddingClient
 2 | from ryoma_ai.embedding.config import EmbeddingConfig
 3 | from ryoma_ai.llm.provider import load_model_provider
 4 | 
 5 | 
 6 | def create_embedder(config: EmbeddingConfig) -> EmbeddingClient:
 7 |     """
 8 |     Creates an EmbeddingClient using the provided configuration.
 9 |     """
10 |     model_parameters = config.parameters or {}
11 | 
12 |     # Allow config.api_key and config.endpoint to override parameters
13 |     if config.api_key:
14 |         model_parameters["api_key"] = config.api_key
15 |     if config.endpoint:
16 |         model_parameters["endpoint"] = config.endpoint
17 | 
18 |     # Load LangChain-compatible embedding model
19 |     langchain_embedder = load_model_provider(
20 |         config.model,
21 |         "embedding",
22 |         model_parameters=model_parameters,
23 |     )
24 | 
25 |     # Wrap it in Ryoma-compatible EmbeddingClient
26 |     return LangchainEmbeddingClient(langchain_embedder)
27 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/tool/spark_tool.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from ryoma_ai.tool.python_tool import PythonTool
 3 | 
 4 | 
 5 | class SparkTool(PythonTool):
 6 |     """Tool for running PySpark script."""
 7 | 
 8 |     name: str = "pyspark_tool"
 9 |     description: str = """
10 |     Run a PySpark analysis script.
11 |     The last line of the script should return a PySpark dataframe.
12 |     If the script is not correct, an error message will be returned.
13 |     """
14 | 
15 | 
16 | class ConvertPandasToSparkTool(PythonTool):
17 |     """Tool for converting a Pandas dataframe to a PySpark dataframe."""
18 | 
19 |     name: str = "convert_pandas_to_pyspark"
20 |     description: str = """
21 |     Convert a Pandas dataframe to a PySpark dataframe.
22 |     If the Pandas dataframe is not correct, an error message will be returned.
23 |     """
24 | 
25 |     def _run(self, dataframe: pd.DataFrame, **kwargs):
26 |         """Convert the Pandas dataframe to a PySpark dataframe."""
27 |         return self.script_context["spark_session"].createDataFrame(dataframe)
28 | 


--------------------------------------------------------------------------------
/docs/source/tech-specs/tech_spec_v1.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Ryoma tech spec v1
 3 | 
 4 | This document describes the technical specifications of the project.
 5 | 
 6 | ## Overview
 7 | 
 8 | ### V1 Architecture
 9 | 
10 | ![Architecture](Architecture_v1.png)
11 | 
12 | ## Components
13 | 
14 | Each design component map to an interface, as well as a database table.
15 | 
16 | ### A) Data Sources
17 | A data source contains the connector to the underlying db
18 | 
19 | #### UI
20 | 
21 | #### API
22 | 
23 | 
24 | #### Service
25 | 
26 | 
27 | ### B) Catalogs
28 | Data Catalogs contain the information (description/schema/data types) of data sources. Specifically, the catalogs include:
29 | 
30 | 1. Name
31 | Name of the data source.
32 | 2. Type
33 | Database, Schema, or Table.
34 | 3. Description
35 | 4. Schema
36 | Schema of the table.
37 | 5. Data Types
38 | Each type of the column in the table.
39 | 6. Metadata
40 | Size of the data source.
41 | 
42 | ### C) Vector store
43 | Vector store is used for storing the indexes of the data catalogs, as well as the user custom RAG content.
44 | 
45 | #### APIs


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/states/utils.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from typing import Any
 3 | 
 4 | from langchain_core.pydantic_v1 import BaseModel
 5 | 
 6 | 
 7 | def get_model_classes(model: Any) -> list:
 8 |     return inspect.getmembers(model, inspect.isclass)
 9 | 
10 | 
11 | def get_model_fields(model: BaseModel, field_name: str) -> BaseModel:
12 |     return model.__fields__[field_name].default
13 | 
14 | 
15 | def get_model_fields_as_dict(model: BaseModel) -> dict:
16 |     d = {}
17 |     for field, value in model.model_fields.items():
18 |         # In Pydantic v1, check if the field is required by checking if default is ... (Ellipsis)
19 |         is_required = value.default is ... if hasattr(value, "default") else True
20 |         description = (
21 |             value.field_info.description
22 |             if hasattr(value, "field_info") and value.field_info
23 |             else None
24 |         )
25 | 
26 |         d[field] = {
27 |             "name": field,
28 |             "required": is_required,
29 |             "description": description,
30 |         }
31 |     return d
32 | 


--------------------------------------------------------------------------------
/rxconfig.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Optional
 3 | 
 4 | import reflex as rx
 5 | from reflex.constants import LogLevel
 6 | 
 7 | 
 8 | class RyomaConfig(rx.Config):
 9 |     """Extended Reflex config with vector store settings."""
10 | 
11 |     # Vector store configuration
12 |     vector_store_type: str = "chroma"
13 |     vector_store_url: Optional[str] = None
14 |     vector_store_collection: str = "ryoma_vectors"
15 |     vector_store_dimension: int = 768
16 | 
17 | 
18 | config = RyomaConfig(
19 |     app_name="ryoma_lab",
20 |     loglevel=LogLevel.INFO,
21 |     # db_url="duckdb:///:memory:",
22 |     # Vector store settings (can be overridden by environment variables)
23 |     vector_store_type="chroma",  # Can be: chroma, pgvector, milvus, qdrant, faiss
24 |     vector_store_url=None,  # If None, will use defaults for the store type
25 |     vector_store_collection="ryoma_vectors",
26 |     vector_store_dimension=768,
27 | )
28 | 
29 | # Setup basic configuration for logging
30 | logging.basicConfig(
31 |     level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
32 | )
33 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | default_language_version:
 2 |   python: python3.10
 3 | 
 4 | default_stages: [commit, push]
 5 | 
 6 | repos:
 7 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 8 |     rev: v2.5.0
 9 |     hooks:
10 |       - id: check-yaml
11 |       - id: end-of-file-fixer
12 |         exclude: LICENSE
13 | 
14 |   - repo: local
15 |     hooks:
16 |       - id: pyupgrade
17 |         name: pyupgrade
18 |         entry: uv run pyupgrade --py38-plus
19 |         types: [python]
20 |         language: system
21 | 
22 |   - repo: local
23 |     hooks:
24 |       - id: isort
25 |         name: isort
26 |         entry: uv run isort --settings-path pyproject.toml
27 |         types: [python]
28 |         language: system
29 | 
30 |   - repo: local
31 |     hooks:
32 |       - id: black
33 |         name: black
34 |         entry: uv run black --config pyproject.toml
35 |         types: [python]
36 |         language: system
37 | 
38 |   - repo: local
39 |     hooks:
40 |       - id: lint
41 |         name: lint
42 |         entry: make lint
43 |         types: [python]
44 |         language: system
45 |         pass_filenames: false
46 | 


--------------------------------------------------------------------------------
/docs/source/roadmap/roadmap.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # Roadmap
 3 | 
 4 | The list below contains the functionality that contributors are planning to develop for Ryoma.
 5 | 
 6 | * We welcome contribution to all items in the roadmap!
 7 | 
 8 | * **Data Sources**
 9 |   * [x] [Snowflake source](https://docs.ryoma.dev/reference/data-sources/snowflake)
10 |   * [ ] [BigQuery source](https://docs.ryoma.dev/reference/data-sources/bigquery)
11 |   * [x] [Parquet file source](https://docs.ryoma.dev/reference/data-sources/file)
12 |   * [x] [Postgres (contrib plugin)](https://docs.ryoma.dev/reference/data-sources/postgres)
13 | 
14 | * **Agents**
15 |   * [x] [Python Agent](https://docs.ryoma.dev/reference/agents/python-agent)
16 |   * [ ] [Java Agent](https://docs.ryoma.dev/reference/agents/java-agent)
17 |   * [ ] [Go Agent](https://docs.ryoma.dev/reference/agents/go-agent)
18 | 
19 | * **Tools**
20 |   * [x] [Python tool](https://docs.ryoma.dev/reference/tools/python)
21 |   * [x] [Pandas tool](https://docs.ryoma.dev/reference/tools/pandas)
22 |   * [x] [Sql tool](https://docs.ryoma.dev/reference/tools/sql)
23 |   * [ ] [Spark tool](https://docs.ryoma.dev/reference/tools/spark)


--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
 1 | name: build
 2 | 
 3 | on: [push, pull_request]
 4 | 
 5 | jobs:
 6 |   build:
 7 |     runs-on: ubuntu-latest
 8 |     strategy:
 9 |       matrix:
10 |         python-version: ["3.10", "3.11", "3.12"]
11 |     
12 |     defaults:
13 |       run:
14 |         working-directory: ./
15 | 
16 |     steps:
17 |     - uses: actions/checkout@v4
18 |     - name: Set up Python ${{ matrix.python-version }}
19 |       uses: actions/setup-python@v5.0.0
20 |       with:
21 |         python-version: ${{ matrix.python-version }}
22 | 
23 |     - name: Install uv
24 |       run: make uv-download
25 | 
26 |     - name: Set up cache
27 |       uses: actions/cache@v4.2.2
28 |       with:
29 |         path: .venv
30 |         key: venv-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('poetry.lock') }}
31 |     - name: Install dependencies
32 |       run: |
33 |         make install
34 |     - name: Run style checks
35 |       run: |
36 |         make check-codestyle
37 | 
38 |     - name: Run unit tests
39 |       run: |
40 |         make unit-test
41 | 
42 |     # - name: Run safety checks
43 |     #   run: |
44 |     #     make check-safety
45 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/cli.py:
--------------------------------------------------------------------------------
 1 | from reflex.reflex import cli
 2 | from ryoma_lab.services.user import UserService
 3 | from ryoma_lab.services.vector_store import VectorStoreService
 4 | 
 5 | 
 6 | @cli.command(
 7 |     name="bootstrap",
 8 |     help="Bootstrap the application with initial data",
 9 | )
10 | def bootstrap():
11 |     with UserService() as user_service:
12 |         user_service.create_user(
13 |             username="admin",
14 |             email="admin@ryoma_ai.com",
15 |             hashed_password="admin",
16 |             permissions={"admin": ["read", "write"]},
17 |         )
18 | 
19 |     with VectorStoreService() as vector_store_service:
20 |         vector_store_service.create_store(
21 |             project_name="default",
22 |             online_store="sqlite",
23 |             online_store_configs={
24 |                 "type": "sqlite",
25 |                 "path": "sqlite:///data/default.db",
26 |                 "vector_enabled": True,
27 |             },
28 |             offline_store="",
29 |             offline_store_configs={},
30 |         )
31 | 
32 | 
33 | def main():
34 |     cli()
35 | 
36 | 
37 | if __name__ == "__main__":
38 |     main()
39 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/store/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Configuration for store backends.
 3 | """
 4 | 
 5 | from typing import Any, Dict, Optional
 6 | 
 7 | from pydantic import BaseModel, Field
 8 | 
 9 | 
10 | class StoreConfig(BaseModel):
11 |     """Configuration for store backend."""
12 | 
13 |     type: str = Field(
14 |         default="memory", description="Type of store: memory, postgres, redis"
15 |     )
16 | 
17 |     connection_string: Optional[str] = Field(
18 |         default=None, description="Connection string for database stores"
19 |     )
20 | 
21 |     options: Dict[str, Any] = Field(
22 |         default_factory=dict, description="Additional store-specific options"
23 |     )
24 | 
25 |     def to_factory_params(self) -> Dict[str, Any]:
26 |         """Convert config to parameters for StoreFactory."""
27 |         params = {"store_type": self.type}
28 | 
29 |         if self.connection_string:
30 |             params["store_config"] = {
31 |                 "connection_string": self.connection_string,
32 |                 **self.options,
33 |             }
34 |         elif self.options:
35 |             params["store_config"] = self.options
36 | 
37 |         return params
38 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/agent/embedding.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Dict, List, Optional, Union
 3 | 
 4 | from langchain_core.documents import Document
 5 | from langchain_core.embeddings import Embeddings
 6 | from ryoma_ai.agent.base import BaseAgent
 7 | from ryoma_ai.llm.provider import load_model_provider
 8 | from ryoma_ai.models.agent import AgentType
 9 | 
10 | 
11 | class EmbeddingAgent(BaseAgent):
12 |     type: str = AgentType.embedding
13 |     description: str = "Simple Embedding Agent"
14 | 
15 |     def __init__(self, model, model_parameters: Optional[Dict] = None):
16 |         logging.info(f"Initializing Embedding Agent with model: {model}")
17 |         self.embedding: Embeddings = load_model_provider(
18 |             model, "embedding", model_parameters=model_parameters
19 |         )
20 | 
21 |     def embed_documents(self, texts: List[Document]) -> List[List[float]]:
22 |         return self.embedding.embed_documents([text.page_content for text in texts])
23 | 
24 |     def embed_query(self, text: Union[Document, str]) -> List[float]:
25 |         text = text.page_content if isinstance(text, Document) else text
26 |         return self.embedding.embed_query(text)
27 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/states/ai.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from ryoma_lab.models.embedding import Embedding
 4 | from ryoma_lab.states.base import BaseState
 5 | 
 6 | 
 7 | class AIState(BaseState):
 8 |     tab_value: str = "agent"
 9 | 
10 |     embedding: Optional[Embedding] = None
11 | 
12 |     selected_model: str = "gpt4all:all-MiniLM-L6-v2-f16"
13 |     dimension: int = 512
14 |     api_key: Optional[str] = ""
15 | 
16 |     def set_model(self, model: str):
17 |         self.selected_model = model
18 |         self.load_embedding()
19 | 
20 |     def set_dimension(self, dimension: str):
21 |         try:
22 |             self.dimension = int(dimension) if dimension else 512
23 |         except ValueError:
24 |             self.dimension = 512
25 |         self.load_embedding()
26 | 
27 |     def set_api_key(self, api_key: str):
28 |         self.api_key = api_key
29 |         self.load_embedding()
30 | 
31 |     def load_embedding(self):
32 |         self.embedding = Embedding(
33 |             model=self.selected_model,
34 |             model_parameters={"api_key": self.api_key, "dimension": self.dimension},
35 |         )
36 | 
37 |     def on_load(self):
38 |         self.load_embedding()
39 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/agent/internals/sql_log_agent.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List
 2 | 
 3 | from ryoma_ai.agent.chat_agent import ChatAgent
 4 | 
 5 | 
 6 | class SQLLogToNLAgent(ChatAgent):
 7 |     def __init__(self, *args, datasource, **kwargs):
 8 |         super().__init__(*args, **kwargs)
 9 |         self.datasource = datasource
10 | 
11 |     def generate_nl_from_sql(self, sql: str, table_name: str) -> str:
12 |         profile = self.datasource.profile_table(table_name)
13 |         schema_desc = "\n".join(f"{col}: {desc}" for col, desc in profile.items())
14 | 
15 |         prompt = f"""
16 | Given the following SQL query and table column descriptions, write a natural language question that would generate this query.
17 | 
18 | Table: {table_name}
19 | Schema:
20 | {schema_desc}
21 | 
22 | SQL:
23 | {sql}
24 | 
25 | Question:
26 | """
27 |         return self.chat(prompt).content
28 | 
29 |     def process_sql_logs(
30 |         self, sql_log: List[str], table_name: str
31 |     ) -> List[Dict[str, str]]:
32 |         return [
33 |             {
34 |                 "sql": sql,
35 |                 "question": self.generate_nl_from_sql(sql, table_name),
36 |             }
37 |             for sql in sql_log
38 |         ]
39 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/services/kernel/service.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from ryoma_ai.datasource.base import DataSource
 4 | from ryoma_lab.services.kernel.base import BaseKernel
 5 | from ryoma_lab.services.kernel.pythonkernel import PythonKernel
 6 | from ryoma_lab.services.kernel.sqlkernel import SqlKernel
 7 | 
 8 | 
 9 | class KernelService:
10 |     def __init__(self, datasource: Optional[DataSource] = None):
11 |         self.datasource = datasource
12 | 
13 |     def __enter__(self):
14 |         return self
15 | 
16 |     def __exit__(self, exc_type, exc_val, exc_tb):
17 |         pass
18 | 
19 |     def create_kernel(self, kernel_type: str, **kwargs) -> BaseKernel:
20 |         if kernel_type == "sql":
21 |             kernel = SqlKernel(datasource=self.datasource, **kwargs)
22 |             return kernel
23 |         elif kernel_type == "python":
24 |             return PythonKernel(datasource=self.datasource, **kwargs)
25 |         else:
26 |             raise ValueError(f"Unsupported kernel type: {kernel_type}")
27 | 
28 |     def set_datasource(self, datasource: DataSource):
29 |         self.datasource = datasource
30 | 
31 |     def get_datasource(self) -> Optional[DataSource]:
32 |         return self.datasource
33 | 


--------------------------------------------------------------------------------
/tests/e2e/ryoma_ai/test_datasource.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ryoma_ai.datasource.postgres import PostgresDataSource
 3 | from sqlalchemy import create_engine
 4 | 
 5 | 
 6 | @pytest.fixture
 7 | def postgres():
 8 |     # Use environment variables for PostgreSQL connection, with defaults for local testing
 9 |     import os
10 | 
11 |     return PostgresDataSource(
12 |         user=os.environ.get("POSTGRES_USER", "postgres"),
13 |         password=os.environ.get("POSTGRES_PASSWORD", "postgres"),
14 |         host=os.environ.get("POSTGRES_HOST", "localhost"),
15 |         port=int(os.environ.get("POSTGRES_PORT", 5432)),
16 |         database=os.environ.get("POSTGRES_DB", "postgres"),
17 |         db_schema="public",
18 |     )
19 | 
20 | 
21 | def test_postgres_connection(postgres):
22 |     conn = postgres.connect()
23 |     assert conn is not None
24 | 
25 | 
26 | def test_postgres_get_metadata(postgres):
27 |     metadata = postgres.get_catalog()
28 |     assert metadata is not None
29 |     assert len(metadata.tables) > 0
30 | 
31 | 
32 | def test_postgres_connection_string(postgres):
33 |     conn_str = postgres.connection_string()
34 |     engine = create_engine(conn_str)
35 |     conn = engine.connect()
36 |     assert conn is not None
37 |     conn.close()
38 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/agent/factory.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from typing import Union
 3 | 
 4 | from ryoma_ai.agent.arrow_agent import ArrowAgent
 5 | from ryoma_ai.agent.chat_agent import ChatAgent
 6 | from ryoma_ai.agent.embedding import EmbeddingAgent
 7 | from ryoma_ai.agent.pandas_agent import PandasAgent
 8 | from ryoma_ai.agent.python_agent import PythonAgent
 9 | from ryoma_ai.agent.spark_agent import SparkAgent
10 | from ryoma_ai.agent.sql import SqlAgent
11 | from ryoma_ai.agent.workflow import WorkflowAgent
12 | 
13 | 
14 | class AgentProvider(Enum):
15 |     base = ChatAgent
16 |     sql = SqlAgent
17 |     pandas = PandasAgent
18 |     pyarrow = ArrowAgent
19 |     pyspark = SparkAgent
20 |     python = PythonAgent
21 |     embedding = EmbeddingAgent
22 | 
23 | 
24 | def get_builtin_agents():
25 |     return list(AgentProvider)
26 | 
27 | 
28 | class AgentFactory:
29 |     @staticmethod
30 |     def create_agent(
31 |         agent_type: str, *args, **kwargs
32 |     ) -> Union[EmbeddingAgent, ChatAgent, WorkflowAgent]:
33 |         if not agent_type or not hasattr(AgentProvider, agent_type):
34 |             agent_class = ChatAgent
35 |         else:
36 |             agent_class = AgentProvider[agent_type].value
37 |         return agent_class(*args, **kwargs)
38 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security
 2 | 
 3 | ## 🔐 Reporting Security Issues
 4 | 
 5 | > Do not open issues that might have security implications!
 6 | > It is critical that security related issues are reported privately so we have time to address them before they become public knowledge.
 7 | 
 8 | Vulnerabilities can be reported by emailing core members:
 9 | 
10 | - ryoma [contact@project-ryoma.com](mailto:contact@project-ryoma.com)
11 | 
12 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue:
13 | 
14 | - Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.)
15 | - Full paths of source file(s) related to the manifestation of the issue
16 | - The location of the affected source code (tag/branch/commit or direct URL)
17 | - Any special configuration required to reproduce the issue
18 | - Environment (e.g. Linux / Windows / macOS)
19 | - Step-by-step instructions to reproduce the issue
20 | - Proof-of-concept or exploit code (if possible)
21 | - Impact of the issue, including how an attacker might exploit the issue
22 | 
23 | This information will help us triage your report more quickly.
24 | 
25 | ## Preferred Languages
26 | 
27 | We prefer all communications to be in English.
28 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/models/datasource.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Data source Registration Model
 3 | """
 4 | 
 5 | import logging
 6 | from dataclasses import asdict, dataclass
 7 | from datetime import datetime
 8 | from typing import Any, Dict, List, Optional
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | @dataclass
14 | class DataSourceRegistration:
15 |     """Data source registration information."""
16 | 
17 |     id: str
18 |     name: str
19 |     type: str
20 |     config: Dict[str, Any]
21 |     created_at: datetime
22 |     updated_at: datetime
23 |     is_active: bool = True
24 |     description: Optional[str] = None
25 |     tags: Optional[List[str]] = None
26 | 
27 |     def to_dict(self) -> Dict[str, Any]:
28 |         """Convert to dictionary for storage."""
29 |         data = asdict(self)
30 |         data["created_at"] = self.created_at.isoformat()
31 |         data["updated_at"] = self.updated_at.isoformat()
32 |         return data
33 | 
34 |     @classmethod
35 |     def from_dict(cls, data: Dict[str, Any]) -> "DataSourceRegistration":
36 |         """Create from dictionary loaded from storage."""
37 |         data["created_at"] = datetime.fromisoformat(data["created_at"])
38 |         data["updated_at"] = datetime.fromisoformat(data["updated_at"])
39 |         return cls(**data)
40 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | ## Description
 2 | 
 3 | <!-- Add a more detailed description of the changes if needed. -->
 4 | 
 5 | ## Related Issue
 6 | 
 7 | <!-- If your PR refers to a related issue, link it here. -->
 8 | 
 9 | ## Type of Change
10 | 
11 | <!-- Mark with an `x` all the checkboxes that apply (like `[x]`) -->
12 | 
13 | - [ ] 📚 Examples / docs / tutorials / dependencies update
14 | - [ ] 🔧 Bug fix (non-breaking change which fixes an issue)
15 | - [ ] 🥂 Improvement (non-breaking change which improves an existing feature)
16 | - [ ] 🚀 New feature (non-breaking change which adds functionality)
17 | - [ ] 💥 Breaking change (fix or feature that would cause existing functionality to change)
18 | - [ ] 🔐 Security fix
19 | 
20 | ## Checklist
21 | 
22 | <!-- Mark with an `x` all the checkboxes that apply (like `[x]`) -->
23 | 
24 | - [ ] I've read the [`CODE_OF_CONDUCT.md`](https://github.com/ryoma/ryoma/blob/master/CODE_OF_CONDUCT.md) document.
25 | - [ ] I've read the [`CONTRIBUTING.md`](https://github.com/ryoma/ryoma/blob/master/CONTRIBUTING.md) guide.
26 | - [ ] I've updated the code style using `make codestyle`.
27 | - [ ] I've written tests for all new methods and classes that I created.
28 | - [ ] I've written the docstring in Google format for all the methods and classes that I used.
29 | 


--------------------------------------------------------------------------------
/docs/source/reference/agent/pandas.md:
--------------------------------------------------------------------------------
 1 | # Pandas Agent
 2 | 
 3 | The Pandas agent is an Ryoma agent that runs on the Pandas library.
 4 | The Pandas agent can be used to ask questions in natural language and interact with Pandas DataFrames.
 5 | 
 6 | ## Example
 7 | 
 8 | pass Data Source to Pandas Agent and return result as a dataframe.
 9 | 
10 | 
11 | ```python
12 | from ryoma_ai.agent.pandas_agent import PandasAgent
13 | from ryoma_ai.datasource.sqlite import SqliteDataSource
14 | from ryoma_ai.prompt.base import BasicContextPromptTemplate
15 | 
16 | datasource = SqliteDataSource("sqlite:///data.db")
17 | pandas_agent = PandasAgent("gpt-3.5-turbo")
18 | .set_context_prompt(BasicContextPromptTemplate)
19 | .add_datasource(datasource)
20 | pandas_agent.stream("Get the top 10 customers by purchase amount")
21 | ```
22 | 
23 | add a DataFrame to the Pandas Agent, ask the agent to analyze the data.
24 | 
25 | 
26 | ```python
27 | from ryoma_ai.agent.pandas_agent import PandasAgent
28 | import pandas as pd
29 | 
30 | df = pd.DataFrame({
31 |     'customer_id': [1, 2, 3, 4, 5],
32 |     'purchase_amount': [100, 200, 300, 400, 500]
33 | })
34 | pandas_agent = PandasAgent("gpt-3.5-turbo")
35 |     .add_dataframe(df)
36 | 
37 | pandas_agent.stream("I want to get the top customers which making the most purchases")
38 | ```
39 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/states.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Optional
 2 | 
 3 | from langgraph.graph.message import add_messages
 4 | from ryoma_ai.models.agent import ColumnExplorationResult, FormatRestriction
 5 | from typing_extensions import Annotated, TypedDict
 6 | 
 7 | 
 8 | class MessageState(TypedDict, total=False):
 9 |     messages: Annotated[list, add_messages]
10 | 
11 |     # SQL Agent fields - optional for all agents
12 |     original_question: str
13 |     current_step: str
14 |     schema_analysis: Optional[Dict]
15 |     relevant_tables: Optional[List[Dict]]
16 |     query_plan: Optional[Dict]
17 |     generated_sql: Optional[str]
18 |     validation_result: Optional[Dict]
19 |     execution_result: Optional[str]
20 |     error_info: Optional[Dict]
21 |     safety_check: Optional[Dict]
22 |     final_answer: Optional[str]
23 |     retry_count: int
24 |     max_retries: int
25 |     sql_approval_received: bool
26 | 
27 |     # ReFoRCE Agent fields - also optional
28 |     compressed_schema: Optional[str]
29 |     format_restriction: Optional[FormatRestriction]
30 |     column_exploration: Optional[ColumnExplorationResult]
31 |     self_refinement_iterations: int
32 |     parallel_candidates: List[Dict[str, Any]]
33 |     consensus_result: Optional[str]
34 |     confidence_score: float
35 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/agent/internals/metadata_agent.py:
--------------------------------------------------------------------------------
 1 | # metadata_agent.py
 2 | from typing import Any, Dict
 3 | 
 4 | from ryoma_ai.agent.chat_agent import ChatAgent
 5 | 
 6 | 
 7 | class MetadataSummarizationAgent(ChatAgent):
 8 |     def summarize_column(self, column_name: str, profile: Dict[str, Any]) -> str:
 9 |         prompt = f"""
10 |         Column Name: {column_name}
11 |         Type: {profile.get("type")}
12 |         Null Ratio: {profile.get("null_ratio"):.2f}
13 |         Distinct Count: {profile.get("distinct_count")}
14 |         """
15 |         if "min" in profile:
16 |             prompt += f"Min: {profile['min']}, Max: {profile['max']}, Mean: {profile['mean']:.2f}\n"
17 |         if "sample_values" in profile:
18 |             prompt += f"Sample Values: {profile['sample_values']}\n"
19 |             prompt += f"Min Length: {profile['min_length']}, Max Length: {profile['max_length']}\n"
20 | 
21 |         prompt += "\nDescribe what this column likely represents."
22 | 
23 |         return self.chat(prompt).content
24 | 
25 |     def summarize_schema(
26 |         self, schema_profile: Dict[str, Dict[str, Any]]
27 |     ) -> Dict[str, str]:
28 |         return {
29 |             column: self.summarize_column(column, profile)
30 |             for column, profile in schema_profile.items()
31 |         }
32 | 


--------------------------------------------------------------------------------
/assets/chakra_color_mode_provider.js:
--------------------------------------------------------------------------------
 1 | import { useColorMode as chakraUseColorMode } from "@chakra-ui/react";
 2 | import { useTheme } from "next-themes";
 3 | import { useEffect, useState } from "react";
 4 | import { ColorModeContext, defaultColorMode } from "$/utils/context.js";
 5 | 
 6 | export default function ChakraColorModeProvider({ children }) {
 7 |   const { theme, resolvedTheme, setTheme } = useTheme();
 8 |   const { colorMode, toggleColorMode } = chakraUseColorMode();
 9 |   const [resolvedColorMode, setResolvedColorMode] = useState(colorMode);
10 | 
11 |   useEffect(() => {
12 |     if (colorMode != resolvedTheme) {
13 |       toggleColorMode();
14 |     }
15 |     setResolvedColorMode(resolvedTheme);
16 |   }, [theme, resolvedTheme]);
17 | 
18 |   const rawColorMode = colorMode;
19 |   const setColorMode = (mode) => {
20 |     const allowedModes = ["light", "dark", "system"];
21 |     if (!allowedModes.includes(mode)) {
22 |       console.error(
23 |         `Invalid color mode "${mode}". Defaulting to "${defaultColorMode}".`
24 |       );
25 |       mode = defaultColorMode;
26 |     }
27 |     setTheme(mode);
28 |   };
29 |   return (
30 |     <ColorModeContext.Provider
31 |       value={{ rawColorMode, resolvedColorMode, toggleColorMode, setColorMode }}
32 |     >
33 |       {children}
34 |     </ColorModeContext.Provider>
35 |   );
36 | }
37 | 


--------------------------------------------------------------------------------
/docs/source/reference/index.md:
--------------------------------------------------------------------------------
 1 | (ryoma-reference)=
 2 | 
 3 | # 📚 API Reference
 4 | 
 5 | Complete reference documentation for Ryoma's APIs, agents, data sources, and tools.
 6 | 
 7 | ## 🎯 Quick Navigation
 8 | 
 9 | | 🤖 Component | 📝 Description | 🔗 Link |
10 | |--------------|----------------|---------|
11 | | **Core API** | Base classes, stores, and configuration | [API Reference →](api/index.md) |
12 | | **Agents** | AI-powered data analysis agents | [Agents →](agent/index.md) |
13 | | **Data Sources** | Database and file connectors | [Data Sources →](data-sources/index.md) |
14 | | **Tools** | Specialized analysis tools | [Tools →](tool/index.md) |
15 | | **Models** | LLM integrations and configurations | [Models →](models/index.md) |
16 | | **Profiling** | Database metadata extraction | [Profiling →](profiling/index.md) |
17 | 
18 | ## 🚀 Latest Features
19 | 
20 | - **Enhanced SQL Agent** - Multi-step reasoning with safety validation
21 | - **ReFoRCE Agent** - State-of-the-art self-refinement capabilities
22 | - **Database Profiling** - Comprehensive metadata extraction with Ibis
23 | - **Advanced Tools** - Query validation, optimization, and explanation
24 | - **Safety Framework** - Configurable validation and security policies
25 | 
26 | ```{toctree}
27 | :maxdepth: 2
28 | 
29 | api/index
30 | agent/index
31 | data-sources/index
32 | tool/index
33 | models/index
34 | profiling/index
35 | ```
36 | 


--------------------------------------------------------------------------------
/scripts/langchain_test.py:
--------------------------------------------------------------------------------
 1 | from langchain.chains import create_sql_query_chain
 2 | from langchain_community.tools import QuerySQLDataBaseTool
 3 | from langchain_community.utilities import SQLDatabase
 4 | from langchain_core.tools import tool
 5 | from langchain_openai import ChatOpenAI
 6 | 
 7 | 
 8 | @tool
 9 | def multiply(a: int, b: int) -> int:
10 |     """Multiply two integers together.
11 | 
12 |     Args:
13 |         a: First integer
14 |         b: Second integer
15 |     """
16 |     return a * b
17 | 
18 | 
19 | # print(json.dumps(convert_to_openai_tool(multiply), indent=2))
20 | #
21 | # llm_with_tool = llm.bind(
22 | #     tools=[convert_to_openai_tool(multiply)],
23 | #     tool_choice={"type": "function", "function": {"name": "multiply"}},
24 | # )
25 | # print(llm_with_tool.invoke(
26 | #     "what's five times four"
27 | # ))
28 | 
29 | 
30 | db = SQLDatabase.from_uri("")
31 | # print(db.dialect)
32 | # print(db.get_usable_table_names())
33 | # db.run("SELECT * FROM orders LIMIT 10;")
34 | 
35 | query_tool = QuerySQLDataBaseTool(db=db)
36 | llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0).bind_tools(
37 |     [multiply, query_tool]
38 | )
39 | chain = create_sql_query_chain(llm, db)
40 | response = chain.invoke(
41 |     {"question": "the top 10 customers buying the most number of orders"}
42 | )
43 | print(response)
44 | 
45 | print(llm.invoke("the top 10 customers buying the most number of orders"))
46 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/states/tool.py:
--------------------------------------------------------------------------------
 1 | import reflex as rx
 2 | from ryoma_ai import tool
 3 | from ryoma_lab.models.tool import Tool, ToolArg
 4 | from ryoma_lab.states.ai import AIState
 5 | from ryoma_lab.states.utils import (
 6 |     get_model_classes,
 7 |     get_model_fields,
 8 |     get_model_fields_as_dict,
 9 | )
10 | 
11 | 
12 | class ToolState(AIState):
13 |     tools: list[Tool]
14 | 
15 |     @rx.var
16 |     def tool_names(self) -> list[str]:
17 |         return [t.name for t in self.tools]
18 | 
19 |     def load_tools(self):
20 |         self.tools = []
21 |         for t in get_model_classes(tool):
22 |             name, cls = t
23 |             description = get_model_fields(cls, "description")
24 |             args_schema = get_model_fields(cls, "args_schema")
25 |             args = get_model_fields_as_dict(args_schema)
26 |             self.tools.append(
27 |                 Tool(
28 |                     name=name,
29 |                     description=description,
30 |                     args=[
31 |                         ToolArg(
32 |                             name=arg["name"],
33 |                             required=arg["required"],
34 |                             description=arg["description"],
35 |                         )
36 |                         for arg in args.values()
37 |                     ],
38 |                 )
39 |             )
40 | 
41 |     def on_load(self):
42 |         self.load_tools()
43 | 


--------------------------------------------------------------------------------
/docs/source/architecture/enhanced-sql-agent-workflow.mmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Enhanced SQL Agent Workflow
 3 | ---
 4 | graph TD
 5 |     %% Main Workflow
 6 |     A[🤔 User Question] --> B[🔍 Analyze Question]
 7 |     B --> C[🔗 Schema Linking]
 8 |     C --> D[📋 Query Planning]
 9 |     D --> E[⚡ Generate SQL]
10 |     E --> F[🛡️ Validate Safety]
11 | 
12 |     %% Safety Check Branch
13 |     F --> G{🔒 Safety Check}
14 |     G -->|✅ Pass| H[▶️ Execute Query]
15 |     G -->|❌ Fail| M[📝 Format Response]
16 | 
17 |     %% Execution Branch
18 |     H --> I{📊 Execution Result}
19 |     I -->|✅ Success| M[📝 Format Response]
20 |     I -->|❌ Error| J[🛠️ Handle Error]
21 |     I -->|🔄 Retry Needed| E
22 | 
23 |     %% Error Handling Branch
24 |     J --> K{🤔 Should Retry?}
25 |     K -->|🔄 Yes, Retry < Max| E
26 |     K -->|🛑 No, Give Up| M
27 | 
28 |     %% Final Output
29 |     M --> N[📝 Final Answer]
30 | 
31 |     %% Styling
32 |     classDef startEnd fill:#e3f2fd,stroke:#1976d2,stroke-width:3px,color:#000
33 |     classDef process fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px,color:#000
34 |     classDef decision fill:#fff8e1,stroke:#f57c00,stroke-width:2px,color:#000
35 |     classDef error fill:#ffebee,stroke:#d32f2f,stroke-width:2px,color:#000
36 |     classDef success fill:#e8f5e8,stroke:#388e3c,stroke-width:2px,color:#000
37 | 
38 |     class A,N startEnd
39 |     class B,C,D,E,H,M process
40 |     class G,I,K decision
41 |     class J error
42 |     class F success
43 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/upload.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | import reflex as rx
 4 | from ryoma_lab import styles
 5 | 
 6 | 
 7 | def upload_render(files: list[str], handle_upload: Any):
 8 |     """The main view."""
 9 |     return rx.vstack(
10 |         rx.upload(
11 |             rx.vstack(
12 |                 rx.button(
13 |                     "Select File",
14 |                 ),
15 |                 rx.text("Drag and drop files here or click to select files"),
16 |                 align="center",
17 |             ),
18 |             id="upload2",
19 |             multiple=True,
20 |             accept={
21 |                 "application/pdf": [".pdf"],
22 |                 "image/png": [".png"],
23 |                 "image/jpeg": [".jpg", ".jpeg"],
24 |                 "image/gif": [".gif"],
25 |                 "image/webp": [".webp"],
26 |                 "text/html": [".html", ".htm"],
27 |             },
28 |             max_files=5,
29 |             disabled=False,
30 |             on_drop=handle_upload(rx.upload_files(upload_id="upload2")),
31 |             border=styles.border,
32 |             padding="5em",
33 |         ),
34 |         rx.grid(
35 |             rx.foreach(
36 |                 files,
37 |                 lambda file: rx.vstack(
38 |                     rx.text(file),
39 |                 ),
40 |             ),
41 |             columns="2",
42 |             spacing="1",
43 |         ),
44 |         padding="5em",
45 |     )
46 | 


--------------------------------------------------------------------------------
/tests/unit_tests/test_datasource.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | from unittest.mock import MagicMock, patch
 3 | 
 4 | import pytest
 5 | from ryoma_ai.datasource.metadata import Catalog
 6 | from ryoma_ai.datasource.sql import SqlDataSource
 7 | 
 8 | 
 9 | class MockSqlDataSource(SqlDataSource):
10 |     def get_query_plan(self, query: str) -> Any:
11 |         pass
12 | 
13 |     def crawl_catalog(self, **kwargs):
14 |         pass
15 | 
16 |     def _connect(self) -> Any:
17 |         mock_connection = MagicMock()
18 |         mock_cursor = MagicMock()
19 |         mock_cursor.fetchall.return_value = [("result1",), ("result2",)]
20 |         mock_cursor.execute.return_value = None
21 |         mock_connection.cursor.return_value = mock_cursor
22 |         return mock_connection
23 | 
24 |     def get_catalog(self, **kwargs) -> Catalog:
25 |         return Catalog()
26 | 
27 | 
28 | @pytest.fixture
29 | def mock_sql_data_source():
30 |     data_source = MockSqlDataSource()
31 |     return data_source
32 | 
33 | 
34 | def test_execute_query(mock_sql_data_source):
35 |     with patch("ryoma_ai.datasource.sql.SqlDataSource.query") as mock_execute:
36 |         mock_execute.return_value = "success"
37 |         results = mock_sql_data_source.query("SELECT * FROM table")
38 |     assert results == "success"
39 | 
40 | 
41 | def test_sql_datasource_field_exists(mock_sql_data_source):
42 |     assert hasattr(mock_sql_data_source, "database")
43 |     assert hasattr(mock_sql_data_source, "db_schema")
44 | 


--------------------------------------------------------------------------------
/assets/github.svg:
--------------------------------------------------------------------------------
 1 | <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg">
 2 | <g id="Github" clip-path="url(#clip0_469_1929)">
 3 | <path id="Vector" d="M8.0004 0.587524C3.80139 0.587524 0.400391 3.98851 0.400391 8.1875C0.400391 11.5505 2.57589 14.391 5.59689 15.398C5.97689 15.4645 6.11939 15.2365 6.11939 15.037C6.11939 14.8565 6.10989 14.258 6.10989 13.6215C4.20039 13.973 3.70639 13.156 3.55439 12.7285C3.46889 12.51 3.09839 11.8355 2.77539 11.655C2.50939 11.5125 2.12939 11.161 2.76589 11.1515C3.36439 11.142 3.79189 11.7025 3.93439 11.9305C4.61839 13.08 5.71089 12.757 6.14789 12.5575C6.21439 12.0635 6.41388 11.731 6.6324 11.541C4.94139 11.351 3.17439 10.6955 3.17439 7.7885C3.17439 6.962 3.46889 6.27801 3.95339 5.74601C3.87739 5.55601 3.61139 4.77701 4.02939 3.73201C4.02939 3.73201 4.66589 3.53251 6.11939 4.51101C6.7274 4.34001 7.3734 4.25451 8.0194 4.25451C8.6654 4.25451 9.3114 4.34001 9.9194 4.51101C11.3729 3.52301 12.0094 3.73201 12.0094 3.73201C12.4274 4.77701 12.1614 5.55601 12.0854 5.74601C12.5699 6.27801 12.8644 6.9525 12.8644 7.7885C12.8644 10.705 11.0879 11.351 9.3969 11.541C9.6724 11.7785 9.9099 12.2345 9.9099 12.947C9.9099 13.9635 9.9004 14.7805 9.9004 15.037C9.9004 15.2365 10.0429 15.474 10.4229 15.398C13.5165 14.3536 15.5996 11.4527 15.6004 8.1875C15.6004 3.98851 12.1994 0.587524 8.0004 0.587524Z" fill="#494369"/>
 4 | </g>
 5 | <defs>
 6 | <clipPath id="clip0_469_1929">
 7 | <rect width="16" height="16" fill="white"/>
 8 | </clipPath>
 9 | </defs>
10 | </svg>
11 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/services/kernel/base.py:
--------------------------------------------------------------------------------
 1 | import asyncio
 2 | import traceback
 3 | from abc import abstractmethod
 4 | from typing import Any, Dict, Optional
 5 | 
 6 | from ryoma_ai.datasource.base import DataSource
 7 | 
 8 | 
 9 | class BaseKernel:
10 |     datasource: DataSource
11 | 
12 |     def __init__(self, datasource: Optional[DataSource] = None, **kwargs):
13 |         self.datasource = datasource
14 | 
15 |     async def execute_code(self, code: str) -> Dict[str, Any]:
16 |         loop = asyncio.get_event_loop()
17 |         return await loop.run_in_executor(None, self.execute, code)
18 | 
19 |     @abstractmethod
20 |     def execute(self, code: str) -> Dict[str, Any]:
21 |         pass
22 | 
23 |     def _create_error_response(self, error: Exception) -> Dict[str, Any]:
24 |         return {
25 |             "output_type": "error",
26 |             "ename": type(error).__name__,
27 |             "evalue": str(error),
28 |             "traceback": self._format_traceback(error),
29 |         }
30 | 
31 |     def _create_success_response(self, result: Any) -> Dict[str, Any]:
32 |         return {
33 |             "output_type": "execute_result",
34 |             "data": {"text/plain": str(result)} if result is not None else None,
35 |         }
36 | 
37 |     def _format_traceback(self, error: Exception) -> str:
38 |         return "".join(
39 |             traceback.format_exception(type(error), error, error.__traceback__)
40 |         )
41 | 
42 |     def set_datasource(self, datasource: DataSource):
43 |         self.datasource = datasource
44 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/reactflow.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List
 2 | 
 3 | import reflex as rx
 4 | 
 5 | 
 6 | class ReactFlowLib(rx.Component):
 7 |     """A component that wraps a react flow lib."""
 8 | 
 9 |     library = "reactflow"
10 | 
11 |     def _get_custom_code(self) -> str:
12 |         return """import 'reactflow/dist/style.css';
13 |         """
14 | 
15 | 
16 | class ReactFlow(ReactFlowLib):
17 |     tag = "ReactFlow"
18 | 
19 |     nodes: rx.Var[List[Dict[str, Any]]]
20 | 
21 |     edges: rx.Var[List[Dict[str, Any]]]
22 | 
23 |     fit_view: rx.Var[bool]
24 | 
25 |     nodes_draggable: rx.Var[bool]
26 | 
27 |     nodes_connectable: rx.Var[bool]
28 | 
29 |     nodes_focusable: rx.Var[bool]
30 | 
31 |     on_nodes_change: rx.EventHandler[lambda e0: [e0]]
32 | 
33 |     on_edges_change: rx.EventHandler[lambda e0: [e0]]
34 | 
35 |     on_connect: rx.EventHandler[lambda e0: [e0]]
36 | 
37 |     def _get_custom_code(self) -> str:
38 |         return """import 'reactflow/dist/style.css';
39 |         """
40 | 
41 | 
42 | class Background(ReactFlowLib):
43 |     tag = "Background"
44 | 
45 |     color: rx.Var[str]
46 | 
47 |     gap: rx.Var[int]
48 | 
49 |     size: rx.Var[int]
50 | 
51 |     variant: rx.Var[str]
52 | 
53 | 
54 | class Controls(ReactFlowLib):
55 |     tag = "Controls"
56 | 
57 | 
58 | class ApplyNodeChanges(ReactFlowLib):
59 |     tag = "applyNodeChanges"
60 | 
61 | 
62 | react_flow = ReactFlow.create
63 | background = Background.create
64 | controls = Controls.create
65 | apply_node_changes = ApplyNodeChanges.create
66 | 


--------------------------------------------------------------------------------
/tests/unit_tests/test_agent.py:
--------------------------------------------------------------------------------
 1 | import openai
 2 | import openai_responses
 3 | import pytest
 4 | from openai_responses import OpenAIMock
 5 | from ryoma_ai.agent.chat_agent import ChatAgent
 6 | 
 7 | from tests.unit_tests.test_utils import (
 8 |     create_chat_completion_response_stream,
 9 |     mock_chat_response,
10 | )
11 | 
12 | 
13 | @pytest.fixture(autouse=True)
14 | def mock_openai_api_key(monkeypatch):
15 |     monkeypatch.setenv("OPENAI_API_KEY", "foo")
16 | 
17 | 
18 | @openai_responses.mock()
19 | def test_create_chat_completion_stream(openai_mock: OpenAIMock):
20 |     openai_mock.chat.completions.create.response = (
21 |         create_chat_completion_response_stream
22 |     )
23 | 
24 |     client = openai.Client(api_key="sk-fake123")
25 |     completion = client.chat.completions.create(
26 |         model="gpt-4o",
27 |         messages=[
28 |             {"role": "system", "content": "You are a helpful assistant."},
29 |             {"role": "user", "content": "Hello!"},
30 |         ],
31 |         stream=True,
32 |     )
33 | 
34 |     received_chunks = 0
35 | 
36 |     for chunk in completion:
37 |         received_chunks += 1
38 |         assert chunk.id
39 | 
40 |     assert received_chunks == 3
41 | 
42 | 
43 | @pytest.fixture
44 | def agent():
45 |     return ChatAgent("gpt-3.5-turbo")
46 | 
47 | 
48 | @openai_responses.mock()
49 | def test_chat(agent, openai_mock: OpenAIMock):
50 |     openai_mock.chat.completions.create.response = mock_chat_response("Hello, world!")
51 |     chat_response = agent.invoke("Hello, world!", display=False)
52 |     assert chat_response.content == "Hello, world!"
53 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/embedding.py:
--------------------------------------------------------------------------------
 1 | import reflex as rx
 2 | from ryoma_lab.components.model_selector import embedding_model_selector
 3 | from ryoma_lab.states.ai import AIState
 4 | 
 5 | 
 6 | def model_config_render() -> rx.Component:
 7 |     return rx.vstack(
 8 |         rx.hstack(
 9 |             rx.text("API Key"),
10 |             rx.input(
11 |                 value=AIState.api_key,
12 |                 on_change=AIState.set_api_key,
13 |             ),
14 |         ),
15 |         rx.hstack(
16 |             rx.text("Dimension"),
17 |             rx.input(
18 |                 value=AIState.dimension,
19 |                 on_change=AIState.set_dimension,
20 |             ),
21 |         ),
22 |         width="100%",
23 |         spacing="4",
24 |     )
25 | 
26 | 
27 | def embedding_component() -> rx.Component:
28 |     return rx.vstack(
29 |         rx.hstack(
30 |             rx.text("Model", width="100px"),
31 |             embedding_model_selector(
32 |                 AIState.selected_model,
33 |                 AIState.set_model,
34 |             ),
35 |         ),
36 |         rx.hstack(
37 |             rx.text("API Key", width="100px"),
38 |             rx.input(
39 |                 value=AIState.api_key,
40 |                 on_change=AIState.set_api_key,
41 |                 type="password",
42 |             ),
43 |         ),
44 |         rx.hstack(
45 |             rx.text("Dimension", width="100px"),
46 |             rx.input(
47 |                 value=AIState.dimension,
48 |                 on_change=AIState.set_dimension,
49 |             ),
50 |         ),
51 |         width="100%",
52 |         padding_x="2em",
53 |     )
54 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/models/cell.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Callable, Coroutine, List, Literal, Optional, Union
 2 | 
 3 | import pandas as pd
 4 | import reflex as rx
 5 | 
 6 | 
 7 | class CellOutput(rx.Base):
 8 |     output_type: Literal["stream", "execute_result", "dataframe", "error"]
 9 |     ename: str = ""
10 |     evalue: str = ""
11 |     traceback: str = ""
12 | 
13 | 
14 | class DataframeOutput(CellOutput):
15 |     output_type: Literal["stream", "execute_result", "dataframe", "error"] = "dataframe"
16 |     dataframe: pd.DataFrame
17 | 
18 | 
19 | class StreamOutput(CellOutput):
20 |     output_type: Literal["stream", "execute_result", "dataframe", "error"] = "stream"
21 |     text: str
22 | 
23 | 
24 | class ExecuteResultOutput(CellOutput):
25 |     output_type: Literal["stream", "execute_result", "dataframe", "error"] = (
26 |         "execute_result"
27 |     )
28 |     execute_result: Union[dict[str, Any], None] = None
29 | 
30 | 
31 | class ErrorOutput(CellOutput):
32 |     output_type: Literal["stream", "execute_result", "dataframe", "error"] = "error"
33 | 
34 | 
35 | class UnknownOutput(ErrorOutput):
36 |     text: str = "Unknown output type"
37 | 
38 | 
39 | class Cell(rx.Base):
40 |     cell_type: str = "code"
41 |     content: str = ""
42 |     output: List[
43 |         Union[
44 |             StreamOutput,
45 |             ExecuteResultOutput,
46 |             DataframeOutput,
47 |             ErrorOutput,
48 |             UnknownOutput,
49 |         ]
50 |     ] = []
51 |     tool_id: Optional[str] = None
52 |     execute_function: Optional[Callable[[str, str], Coroutine[Any, Any, None]]] = None
53 |     update_function: Optional[Callable[[str, str], None]] = None
54 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/README.md:
--------------------------------------------------------------------------------
 1 | # Ryoma
 2 | 
 3 | Ryoma lib is the core component of the project which includes:
 4 | - **Data Sources** that can be used to fetch data from different sources
 5 | - **Agents** that can be used to process data with AI models
 6 | - **Tools** that can be used by agent to process data
 7 | 
 8 | ## Installation
 9 | 
10 | ### Basic Installation
11 | ```bash
12 | pip install ryoma_ai
13 | ```
14 | 
15 | ### Installing with Optional Dependencies
16 | 
17 | Ryoma AI uses lazy imports for datasource dependencies, so you only need to install the dependencies for the datasources you plan to use:
18 | 
19 | ```bash
20 | # For PostgreSQL support
21 | pip install ryoma_ai[postgres]
22 | 
23 | # For MySQL support  
24 | pip install ryoma_ai[mysql]
25 | 
26 | # For Snowflake support
27 | pip install ryoma_ai[snowflake]
28 | 
29 | # For BigQuery support
30 | pip install ryoma_ai[bigquery]
31 | 
32 | # For DuckDB support
33 | pip install ryoma_ai[duckdb]
34 | 
35 | # For DynamoDB support
36 | pip install ryoma_ai[dynamodb]
37 | 
38 | # For Apache Iceberg support
39 | pip install ryoma_ai[iceberg]
40 | 
41 | # For PySpark support
42 | pip install ryoma_ai[pyspark]
43 | 
44 | # Multiple datasources
45 | pip install ryoma_ai[postgres,mysql,duckdb]
46 | ```
47 | 
48 | ## Usage
49 | 
50 | ```python
51 | from ryoma_ai.datasource.postgres import PostgresDataSource
52 | from ryoma_ai.agent.sql import SqlAgent
53 | 
54 | datasource = PostgresDataSource("postgresql://user:password@localhost/db")
55 | sql_agent = SqlAgent("gpt-3.5-turbo").add_datasource(datasource)
56 | sql_agent.stream("Get the top 10 rows from the data source")
57 | ```
58 | 
59 | ## Documentation
60 | Visit the [documentation](https://project-ryoma.github.io/ryoma/) for more information.
61 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/tool/python_tool.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any, Dict, Sequence, Type, Union
 3 | 
 4 | from IPython import get_ipython
 5 | from IPython.core.interactiveshell import ExecutionResult, InteractiveShell
 6 | from langchain_core.tools import BaseTool
 7 | from pydantic import BaseModel, Field
 8 | 
 9 | log = logging.getLogger(__name__)
10 | 
11 | 
12 | class PythonInput(BaseModel):
13 |     script: str = Field(description="python script")
14 | 
15 | 
16 | class PythonTool(BaseTool):
17 |     """Tool for running python script in an IPython environment."""
18 | 
19 |     name: str = "run_ipython_script_tool"
20 |     description: str = """
21 |     Execute a python script in an IPython environment and return the result of the last expression.
22 |     If the script is not correct, an error message will be returned.
23 |     """
24 |     args_schema: Type[BaseModel] = PythonInput
25 | 
26 |     ipython: InteractiveShell = None
27 | 
28 |     def __init__(self, /, **data: Any):
29 |         super().__init__(**data)
30 |         self.ipython = get_ipython()
31 |         if not self.ipython:
32 |             self.ipython = InteractiveShell()
33 | 
34 |     def _run(
35 |         self,
36 |         script,
37 |     ) -> Union[str, Sequence[Dict[str, Any]], ExecutionResult]:
38 |         """Execute the script, return the result or an error message."""
39 |         try:
40 |             result = self.ipython.run_cell(script)
41 |             return result
42 |         except Exception as e:
43 |             return str(e)
44 | 
45 |     def update_script_context(self, script_context: Any):
46 |         try:
47 |             self.ipython.user_ns.update(script_context)
48 |         except Exception as e:
49 |             return str(e)
50 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/embedding/client.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from abc import ABC, abstractmethod
 3 | from typing import List, Optional
 4 | 
 5 | from langchain_core.embeddings import Embeddings
 6 | from ryoma_ai.llm.provider import load_model_provider
 7 | 
 8 | 
 9 | def get_embedding_client(
10 |     selected_model: str, model_parameters: dict[str, str] = None
11 | ) -> Embeddings:
12 |     logging.info(f"Creating embedding client for {selected_model}")
13 |     return load_model_provider(
14 |         selected_model,
15 |         "embedding",
16 |         model_parameters=model_parameters,
17 |     )
18 | 
19 | 
20 | class EmbeddingClient(ABC):
21 |     @abstractmethod
22 |     def embed(self, text: str) -> List[float]:
23 |         pass
24 | 
25 |     def embed_batch(self, texts: List[str]) -> List[List[float]]:
26 |         return [self.embed(t) for t in texts]
27 | 
28 |     @abstractmethod
29 |     def langchain(self) -> Embeddings:
30 |         pass
31 | 
32 | 
33 | class LangchainEmbeddingClient(EmbeddingClient):
34 |     def __init__(self, lc_embedder: Embeddings):
35 |         self._embedder = lc_embedder
36 | 
37 |     def embed(self, text: str) -> List[float]:
38 |         return self._embedder.embed_query(text)
39 | 
40 |     def embed_batch(self, texts: List[str]) -> List[List[float]]:
41 |         return self._embedder.embed_documents(texts)
42 | 
43 |     def langchain(self) -> Embeddings:
44 |         return self._embedder
45 | 
46 | 
47 | def create_embedder(
48 |     model_name: str, model_parameters: Optional[dict] = None
49 | ) -> EmbeddingClient:
50 |     lc_embedder = get_embedding_client(
51 |         model_name,
52 |         model_parameters=model_parameters or {},
53 |     )
54 |     return LangchainEmbeddingClient(lc_embedder)
55 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/datasource/bigquery.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Optional
 3 | 
 4 | import ibis
 5 | from databuilder.job.job import DefaultJob
 6 | from databuilder.loader.base_loader import Loader
 7 | from databuilder.task.task import DefaultTask
 8 | from ibis import BaseBackend
 9 | from langchain_core.pydantic_v1 import Field
10 | from pyhocon import ConfigFactory
11 | from ryoma_ai.datasource.sql import SqlDataSource
12 | 
13 | 
14 | class BigqueryDataSource(SqlDataSource):
15 |     project_id: str = Field(..., description="Bigquery current_store ID")
16 |     dataset_id: str = Field(..., description="Bigquery dataset ID")
17 |     credentials: Optional[str] = Field(None, description="Path to the credentials file")
18 | 
19 |     def _connect(self, **kwargs) -> BaseBackend:
20 |         return ibis.bigquery.connect(
21 |             project_id=self.project_id,
22 |             dataset_id=self.dataset_id,
23 |             credentials=self.credentials,
24 |             **kwargs,
25 |         )
26 | 
27 |     def crawl_catalog(self, loader: Loader, where_clause_suffix: Optional[str] = ""):
28 |         from databuilder.extractor.bigquery_metadata_extractor import (
29 |             BigQueryMetadataExtractor,
30 |         )
31 | 
32 |         logging.info("Crawling data catalog from Bigquery")
33 |         job_config = ConfigFactory.from_dict(
34 |             {
35 |                 "extractor.bigquery_table_metadata.{}".format(
36 |                     BigQueryMetadataExtractor.PROJECT_ID_KEY
37 |                 )
38 |             }
39 |         )
40 |         job = DefaultJob(
41 |             conf=job_config,
42 |             task=DefaultTask(extractor=BigQueryMetadataExtractor(), loader=loader),
43 |         )
44 | 
45 |         job.launch()
46 | 


--------------------------------------------------------------------------------
/.github/workflows/gitpages.yml:
--------------------------------------------------------------------------------
 1 | name: Generate and publish docs
 2 | 
 3 | on: 
 4 |   pull_request:
 5 |     branches: ["main"]
 6 |   # Runs on pushes targeting the default branch
 7 |   push:
 8 |     branches: ["main"]
 9 |   # Allows you to run this workflow manually from the Actions tab
10 |   workflow_dispatch:
11 |   
12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
13 | permissions:
14 |   contents: write
15 |   pages: write
16 |   id-token: write
17 | 
18 | jobs:
19 |   build:
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |       - uses: actions/checkout@v2
23 |       - name: Setup Python
24 |         uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
25 |         with:
26 |           python-version: '3.10'
27 |       - name: Install Dependencies
28 |         run: |
29 |           python -m pip install --quiet --upgrade pip setuptools wheel
30 |           python -m pip install -r docs/source/requirements.txt
31 |       - name: Build Docs
32 |         run: |
33 |           cd docs/
34 |           make html
35 |       - name: Upload artifact
36 |         uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3.0.1
37 |         with:
38 |           path: 'docs/build/html'
39 |   deploy:
40 |     needs: 'build'
41 |     environment:
42 |       name: github-pages
43 |       url: ${{ steps.deployment.outputs.page_url }}
44 |     runs-on: ubuntu-latest
45 |     steps:
46 |       - name: Setup Pages
47 |         uses: actions/configure-pages@983d7736d9b0ae728b81ab479565c72886d7745b # v5.0.0
48 |         with:
49 |           enablement: true 
50 |       - name: Deploy to GitHub Pages
51 |         id: deployment
52 |         uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4.0.5    


--------------------------------------------------------------------------------
/example_config.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "model": "gpt-4o",
 3 |   "mode": "enhanced",
 4 |   
 5 |   "meta_store": {
 6 |     "type": "postgres",
 7 |     "connection_string": "postgresql://user:password@localhost:5432/metadata_db",
 8 |     "options": {
 9 |       "timeout": 30,
10 |       "pool_size": 10
11 |     }
12 |   },
13 |   
14 |   "vector_store": {
15 |     "type": "pgvector",
16 |     "collection_name": "ryoma_vectors",
17 |     "dimension": 768,
18 |     "distance_metric": "cosine",
19 |     "extra_configs": {
20 |       "connection_string": "postgresql://postgres:postgres@localhost:5432/vectordb",
21 |       "host": "localhost",
22 |       "port": 5432,
23 |       "database": "vectordb",
24 |       "user": "postgres",
25 |       "password": "postgres",
26 |       "distance_strategy": "cosine"
27 |     }
28 |   },
29 |   
30 |   "datasources": [
31 |     {
32 |       "name": "default",
33 |       "type": "postgres",
34 |       "host": "localhost",
35 |       "port": 5432,
36 |       "database": "postgres",
37 |       "user": "postgres",
38 |       "password": "password"
39 |     },
40 |     {
41 |       "name": "analytics_db",
42 |       "type": "mysql",
43 |       "host": "analytics.company.com",
44 |       "port": 3306,
45 |       "database": "analytics",
46 |       "user": "analytics_user",
47 |       "password": "analytics_password"
48 |     },
49 |     {
50 |       "name": "warehouse",
51 |       "type": "snowflake",
52 |       "account": "your_account",
53 |       "user": "warehouse_user",
54 |       "password": "warehouse_password",
55 |       "database": "warehouse",
56 |       "schema": "public",
57 |       "warehouse": "compute_wh"
58 |     }
59 |   ],
60 |   
61 |   "agent": {
62 |     "auto_approve_all": false,
63 |     "retry_count": 3,
64 |     "timeout_seconds": 300
65 |   }
66 | }


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/datasource/base.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any, Dict, Optional
 3 | 
 4 | from databuilder.loader.base_loader import Loader
 5 | from pydantic import BaseModel
 6 | from ryoma_ai.datasource.metadata import Catalog
 7 | 
 8 | 
 9 | class DataSource(BaseModel, ABC):
10 |     model_config = {
11 |         "arbitrary_types_allowed": True,
12 |         "extra": "allow",
13 |     }
14 | 
15 |     def __init__(self, type: str, **kwargs: Any):
16 |         super().__init__(**kwargs)
17 |         self.type = type
18 | 
19 |     @abstractmethod
20 |     def get_catalog(self, **kwargs: Dict[str, Any]) -> Catalog:
21 |         raise NotImplementedError("get_catalog is not implemented for this data source")
22 | 
23 |     @abstractmethod
24 |     def crawl_catalog(
25 |         self, loader: Loader, **kwargs: Dict[str, Any]
26 |     ) -> Optional[Catalog]:
27 |         raise NotImplementedError(
28 |             "crawl_catalog is not implemented for this data source."
29 |         )
30 | 
31 |     @abstractmethod
32 |     def prompt(self, schema: Optional[str] = None, table: Optional[str] = None) -> str:
33 |         raise NotImplementedError("prompt is not implemented for this data source.")
34 | 
35 |     @abstractmethod
36 |     def profile_table(self, table_name: str, **kwargs: Dict[str, Any]) -> dict:
37 |         """
38 |         Profile a table and return its metadata.
39 | 
40 |         Args:
41 |             table_name (str): The name of the table to profile.
42 |             **kwargs: Additional parameters for profiling.
43 | 
44 |         Returns:
45 |             dict: A dictionary containing the table's metadata.
46 |         """
47 |         raise NotImplementedError(
48 |             "profile_table is not implemented for this data source."
49 |         )
50 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling>=1.4.0"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "ryoma_lab"
 7 | version = "0.0.1"
 8 | description = "Interactive UI for Ryoma AI Platform"
 9 | readme = "README.md"
10 | repository = "https://github.com/project-ryoma/ryoma"
11 | homepage = "https://www.ryoma-ai.com"
12 | requires-python = ">=3.9"
13 | 
14 | dependencies = [
15 |     "alembic>=1.13.1",
16 |     "reflex>=0.7.0",
17 |     "transformers>=4.40.2",
18 |     "sql-metadata>=2.11.0",
19 |     "amundsen-databuilder>=7.5.0",
20 |     "ibis-framework>=10.0.0",
21 |     "fastapi-users[sqlalchemy,oauth]>=13.0.0",
22 |     "pypdf>=4.3.1",
23 |     "sql-formatter>=0.6.2",
24 |     "torch>=2.4.0",
25 |     "gpt4all>=2.8.2",
26 |     "langchain-huggingface>=0.0.3",
27 |     "langchain-qdrant>=0.1.3",
28 |     "ipython>=8.27.0",
29 |     "jupyter-ai-magics>=2.24.0",
30 |     "langgraph>=0.2.28",
31 |     "nbformat>=5.10.4",
32 |     "duckdb>=1.1.1",
33 | ]
34 | 
35 | [tool.uv.sources]
36 | ryoma_ai = { path = "./packages/ryoma_ai" }
37 | 
38 | [project.scripts]
39 | ryoma = "ryoma_lab.cli:main"
40 | 
41 | 
42 | [project.optional-dependencies]
43 | snowflake = [
44 |     "ibis-framework[snowflake]>=9.0.0",
45 |     "amundsen-databuilder[snowflake]>=7.5.0",
46 | ]
47 | sqlite = [
48 |     "ibis-framework[sqlite]>=9.0.0",
49 | ]
50 | postgres = [
51 |     "psycopg2>=2.9.2",
52 |     "ibis-framework[postgres]>=9.0.0",
53 | ]
54 | mysql = [
55 |     "ibis-framework[mysql]>=9.0.0",
56 |     "amundsen-databuilder[rds]>=7.5.0",
57 | ]
58 | bigquery = [
59 |     "ibis-framework[bigquery]>=9.0.0",
60 |     "amundsen-databuilder[bigquery]>=7.5.0",
61 | ]
62 | pyspark = [
63 |     "ibis-framework[pyspark]>=9.0.0",
64 |     "pyspark>=3.2.0",
65 |     "findspark>=1.4.2",
66 | ]
67 | 


--------------------------------------------------------------------------------
/docs/source/reference/api/index.md:
--------------------------------------------------------------------------------
 1 | # 🔧 API Reference
 2 | 
 3 | Complete API documentation for Ryoma's core components and interfaces.
 4 | 
 5 | ## 🎯 Core APIs
 6 | 
 7 | | 🧩 Component | 📝 Description | 🔗 Link |
 8 | |--------------|----------------|---------|
 9 | | **Core API** | Base agents, stores, and configuration | [Core API →](core.md) |
10 | 
11 | ## 🏗️ Architecture APIs
12 | 
13 | The API reference covers Ryoma's unified three-tier architecture:
14 | 
15 | ### 1. **Agent Layer**
16 | - Base agent functionality and common interfaces
17 | - Agent factory and creation patterns
18 | - Multi-agent routing and coordination
19 | 
20 | ### 2. **Store Layer** 
21 | - Metadata store management
22 | - Vector store operations
23 | - Unified store coordination
24 | 
25 | ### 3. **Data Layer**
26 | - Data source connections and management
27 | - Catalog indexing and search
28 | - Query execution and validation
29 | 
30 | ## 🚀 Quick Examples
31 | 
32 | ### Agent Creation
33 | ```python
34 | from ryoma_ai.agent.factory import AgentFactory
35 | 
36 | # Create any agent type
37 | agent = AgentFactory.create_agent(
38 |     agent_type="sql",
39 |     model="gpt-4o",
40 |     datasource=datasource,
41 |     store=meta_store,
42 |     vector_store=vector_store
43 | )
44 | ```
45 | 
46 | ### Store Management
47 | ```python
48 | from ryoma_ai.store.store_factory import StoreFactory
49 | 
50 | # Create unified stores
51 | meta_store = StoreFactory.create_store(
52 |     store_type="postgres",
53 |     connection_string="postgresql://localhost:5432/metadata"
54 | )
55 | ```
56 | 
57 | ### Catalog Operations
58 | ```python
59 | # Search and index operations
60 | agent.index_datasource(datasource, level="table")
61 | catalog = agent.search_catalogs("customer data", top_k=10)
62 | ```
63 | 
64 | ```{toctree}
65 | :maxdepth: 2
66 | 
67 | core
68 | ```


--------------------------------------------------------------------------------
/tests/e2e/download_gpt4all_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Helper script to download GPT4All models for testing.
 4 | 
 5 | Usage:
 6 |     python download_gpt4all_model.py [model_name]
 7 | 
 8 | If no model_name is provided, downloads the default test model.
 9 | """
10 | import sys
11 | from pathlib import Path
12 | 
13 | 
14 | def download_model(model_name="Llama-3.2-1B-Instruct-Q4_0.gguf"):
15 |     """Download a GPT4All model."""
16 |     try:
17 |         from gpt4all import GPT4All
18 | 
19 |         print(f"Downloading GPT4All model: {model_name}")
20 |         print("This may take a few minutes depending on your internet connection...")
21 | 
22 |         # Check if model already exists
23 |         cache_dir = Path.home() / ".cache" / "gpt4all"
24 |         model_path = cache_dir / model_name
25 | 
26 |         if model_path.exists():
27 |             print(f"Model {model_name} already exists at {model_path}")
28 |             return True
29 | 
30 |         # Create GPT4All instance - this will trigger download
31 |         model = GPT4All(model_name)
32 | 
33 |         print(f"Model {model_name} downloaded successfully!")
34 |         print(f"Model path: {model_path}")
35 | 
36 |         # Test the model
37 |         print("Testing model...")
38 |         response = model.generate("Hello, how are you?", max_tokens=50)
39 |         print(f"Test response: {response}")
40 | 
41 |         return True
42 | 
43 |     except ImportError:
44 |         print("GPT4All not available. Please install with: pip install gpt4all")
45 |         return False
46 |     except Exception as e:
47 |         print(f"Error downloading model: {e}")
48 |         return False
49 | 
50 | 
51 | if __name__ == "__main__":
52 |     model_name = sys.argv[1] if len(sys.argv) > 1 else "Llama-3.2-1B-Instruct-Q4_0.gguf"
53 |     success = download_model(model_name)
54 |     sys.exit(0 if success else 1)
55 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/datasource/file.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | import pyarrow as pa
 4 | from ryoma_ai.datasource.base import DataSource
 5 | from ryoma_ai.datasource.metadata import Table
 6 | 
 7 | 
 8 | class FileConfig:
 9 |     pass
10 | 
11 | 
12 | class FileDataSource(DataSource):
13 |     type: str = "file"
14 |     file_path: str
15 |     file_format: str
16 |     file_name: str
17 | 
18 |     def __init__(
19 |         self,
20 |         file_path: str,
21 |         file_format: str,
22 |         file_name: Optional[str] = None,
23 |         **kwargs,
24 |     ):
25 |         self.file_name = file_name or file_path
26 |         self.file_path = file_path
27 |         self.file_format = file_format
28 |         super().__init__(
29 |             name=self.file_name,
30 |             type="file",
31 |         )
32 | 
33 |     def get_catalog(self, **kwargs) -> Table:
34 |         table_schema = self.to_arrow(**kwargs).schema
35 |         return Table(
36 |             table_name=self.file_name,
37 |             table_columns=[
38 |                 {"column_name": name, "column_type": str(table_schema.field(name))}
39 |                 for name in table_schema.names
40 |             ],
41 |         )
42 | 
43 |     def to_arrow(self, **kwargs) -> pa.Table:
44 |         if self.file_format == "csv":
45 |             from pyarrow.csv import read_csv
46 | 
47 |             return read_csv(self.file_path, **kwargs)
48 |         elif self.file_format == "parquet":
49 |             from pyarrow.parquet import read_table
50 | 
51 |             return read_table(self.file_path, **kwargs)
52 |         elif self.file_format == "json":
53 |             from pyarrow.json import read_json
54 | 
55 |             return read_json(self.file_path, **kwargs)
56 |         else:
57 |             raise NotImplementedError(f"FileFormat is unsupported: {self.file_format}")
58 | 
59 |     def to_pandas(self, **kwargs):
60 |         return self.to_arrow().to_pandas()
61 | 


--------------------------------------------------------------------------------
/docs/source/architecture/index.md:
--------------------------------------------------------------------------------
 1 | (ryoma-architecture)=
 2 | 
 3 | # Architecture
 4 | 
 5 | Ryoma architecture diagram documentation.
 6 | 
 7 | ## Enhanced SQL Agent
 8 | 
 9 | The Enhanced SQL Agent is a state-of-the-art Text-to-SQL system that combines cutting-edge research insights with enterprise-grade reliability, safety, and performance.
10 | 
11 | ### Key Features
12 | 
13 | - **Multi-step reasoning** with intelligent workflow management
14 | - **Advanced schema linking** using research-based algorithms
15 | - **Comprehensive safety validation** with configurable policies
16 | - **Intelligent error handling** with automatic recovery
17 | - **ReFoRCE optimizations** for state-of-the-art performance
18 | 
19 | ### Documentation
20 | 
21 | - **[Enhanced SQL Agent Architecture](enhanced-sql-agent.md)** - Comprehensive technical documentation
22 | - **[Database Profiling System](database-profiling.md)** - Comprehensive metadata extraction and profiling
23 | - **[Quick Reference Guide](sql-agent-quick-reference.md)** - Quick start and API reference
24 | - **[Workflow Diagrams](enhanced-sql-agent-workflow.mmd)** - Visual workflow representations
25 | 
26 | ## Database Profiling System
27 | 
28 | The Database Profiling System implements comprehensive metadata extraction based on research from the "Automatic Metadata Extraction for Text-to-SQL" paper.
29 | 
30 | ### Key Features
31 | 
32 | - **Statistical Analysis** - Row counts, NULL statistics, distinct-value ratios
33 | - **Type-Specific Profiling** - Numeric, date, and string analysis
34 | - **Semantic Type Inference** - Automatic detection of emails, phones, URLs, etc.
35 | - **Data Quality Scoring** - Multi-dimensional quality assessment
36 | - **LSH Similarity** - Locality-sensitive hashing for column similarity
37 | - **Top-k Frequent Values** - Most common data patterns
38 | 
39 | ```{toctree}
40 | :maxdepth: 2
41 | 
42 | architecture
43 | enhanced-sql-agent
44 | database-profiling
45 | sql-agent-quick-reference
46 | ```
47 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/modal.py:
--------------------------------------------------------------------------------
 1 | import reflex as rx
 2 | import reflex_chakra as rc
 3 | from ryoma_lab.states.workspace import ChatState
 4 | 
 5 | 
 6 | def modal() -> rx.Component:
 7 |     """A modal to create a new workspace."""
 8 |     return rc.modal(
 9 |         rc.modal_overlay(
10 |             rc.modal_content(
11 |                 rc.modal_header(
12 |                     rc.hstack(
13 |                         rc.text("Create new workspace"),
14 |                         rc.icon(
15 |                             tag="close",
16 |                             font_size="sm",
17 |                             on_click=ChatState.toggle_modal,
18 |                             color="#fff8",
19 |                             _hover={"color": "#fff"},
20 |                             cursor="pointer",
21 |                         ),
22 |                         align_items="center",
23 |                         justify_content="space-between",
24 |                     )
25 |                 ),
26 |                 rc.modal_body(
27 |                     rc.input(
28 |                         placeholder="Type something...",
29 |                         on_blur=ChatState.set_new_chat_name,
30 |                         bg="#222",
31 |                         border_color="#fff3",
32 |                         _placeholder={"color": "#fffa"},
33 |                     ),
34 |                 ),
35 |                 rc.modal_footer(
36 |                     rc.button(
37 |                         "Create",
38 |                         bg="#5535d4",
39 |                         box_shadow="md",
40 |                         px="4",
41 |                         py="2",
42 |                         h="auto",
43 |                         _hover={"bg": "#4c2db3"},
44 |                         on_click=ChatState.create_chat,
45 |                     ),
46 |                 ),
47 |                 bg="#222",
48 |                 color="#fff",
49 |             ),
50 |         ),
51 |         is_open=ChatState.modal_open,
52 |     )
53 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/agent/pandas_agent.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | from typing import Dict, Optional
 3 | 
 4 | from pandas import DataFrame
 5 | from ryoma_ai.agent.workflow import WorkflowAgent
 6 | from ryoma_ai.tool.pandas_tool import PandasTool
 7 | from ryoma_ai.tool.python_tool import PythonTool
 8 | 
 9 | 
10 | class PandasAgent(WorkflowAgent):
11 |     description: str = (
12 |         "A pandas agent that can use pandas tools to interact with pandas DataFrames."
13 |     )
14 | 
15 |     def __init__(self, model: str, model_parameters: Dict = None):
16 |         super().__init__(
17 |             [
18 |                 PandasTool(),
19 |             ],
20 |             model,
21 |             model_parameters,
22 |         )
23 | 
24 |     def add_dataframe(
25 |         self,
26 |         dataframe: DataFrame,
27 |         df_id: Optional[str] = None,
28 |     ) -> "PandasAgent":
29 |         """
30 |         Register a DataFrame as a resource, update the prompt context and tool script context.
31 | 
32 |         Args:
33 |             dataframe: The pandas DataFrame to register.
34 |             df_id: Optional custom name for the DataFrame.
35 | 
36 |         Returns:
37 |             self (for chaining)
38 |         """
39 |         # Register DataFrame in the agent's registry
40 |         obj_id = self.register_resource(dataframe)
41 |         df_name = df_id or f"df_{obj_id}"
42 | 
43 |         # Add prompt context (note: dataframe.info() prints, we capture as string)
44 |         buffer = io.StringIO()
45 |         dataframe.info(buf=buffer)
46 |         metadata_str = buffer.getvalue()
47 | 
48 |         self.add_prompt(
49 |             f"""
50 |             dataframe name: {df_name}
51 |             dataframe metadata:\n{metadata_str}
52 |             """
53 |         )
54 | 
55 |         # Inject into PythonTool script context
56 |         for tool in self.tools:
57 |             if isinstance(tool, PythonTool):
58 |                 tool.update_script_context(script_context={df_name: dataframe})
59 | 
60 |         return self
61 | 


--------------------------------------------------------------------------------
/examples/e2e_example.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pandas as pd
 4 | from ryoma_ai.agent.pandas_agent import PandasAgent
 5 | from ryoma_ai.agent.sql import SqlAgent
 6 | from ryoma_ai.agent.workflow import ToolMode
 7 | from ryoma_ai.datasource.postgres import PostgresDataSource
 8 | 
 9 | 
10 | def get_postgres_datasource():
11 |     return PostgresDataSource(
12 |         host=os.getenv("POSTGRES_HOST", "localhost"),
13 |         port=os.getenv("POSTGRES_PORT", 5432),
14 |         database=os.getenv("POSTGRES_DB", "postgres"),
15 |         user=os.getenv("POSTGRES_USER"),
16 |         password=os.getenv("POSTGRES_PASSWORD"),
17 |         db_schema=os.getenv("POSTGRES_SCHEMA", "public"),
18 |     )
19 | 
20 | 
21 | postgres_db = get_postgres_datasource()
22 | 
23 | 
24 | def run_pandas():
25 |     pandas_agent = PandasAgent("gpt-3.5-turbo")
26 |     df = pd.DataFrame(
27 |         {
28 |             "artist": ["Artist A", "Artist B", "Artist C", "Artist A", "Artist B"],
29 |             "album": ["Album 1", "Album 2", "Album 3", "Album 4", "Album 5"],
30 |         }
31 |     )
32 |     pandas_agent.add_dataframe(df)
33 |     pandas_agent.invoke("show me the artits with the most albums in descending order")
34 |     pandas_agent.invoke(tool_mode=ToolMode.ONCE)
35 | 
36 | 
37 | def run_sql_rag():
38 |     sql_agent = SqlAgent(
39 |         "gpt-3.5-turbo",
40 |         embedding={"model": "text-embedding-3-small"},
41 |         vector_store={
42 |             "type": "pgvector",
43 |         },
44 |     )
45 |     sql_agent.index_datasource(postgres_db, level="table")
46 |     catalog = sql_agent.search_catalogs(
47 |         "I want to get the top 10 artists with the most albums in descending order",
48 |         top_k=3,
49 |     )
50 |     sql_agent.add_prompt(catalog.prompt)
51 |     sql_agent.invoke("show me the tables in the database")
52 | 
53 | 
54 | def run_sql():
55 |     sql_agent = SqlAgent("gpt-3.5-turbo")
56 |     sql_agent.add_prompt(postgres_db.prompt())
57 |     sql_agent.invoke("show me the tables in the database")
58 | 
59 | 
60 | run_sql()
61 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/store/exceptions.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Exception classes for store operations.
 3 | """
 4 | 
 5 | from typing import Optional
 6 | 
 7 | 
 8 | class StoreException(Exception):
 9 |     """Base exception for all store-related errors."""
10 | 
11 |     def __init__(self, message: str, cause: Optional[Exception] = None):
12 |         super().__init__(message)
13 |         self.cause = cause
14 | 
15 | 
16 | class DataSourceNotFoundError(StoreException):
17 |     """Raised when a requested data source is not found in the store."""
18 | 
19 |     def __init__(self, data_source_id: str, cause: Optional[Exception] = None):
20 |         message = f"Data source with ID '{data_source_id}' not found"
21 |         super().__init__(message, cause)
22 |         self.data_source_id = data_source_id
23 | 
24 | 
25 | class CatalogNotFoundError(StoreException):
26 |     """Raised when a requested catalog is not found in the store."""
27 | 
28 |     def __init__(
29 |         self,
30 |         catalog_id: str,
31 |         data_source_id: Optional[str] = None,
32 |         cause: Optional[Exception] = None,
33 |     ):
34 |         if data_source_id:
35 |             message = (
36 |                 f"Catalog '{catalog_id}' not found for data source '{data_source_id}'"
37 |             )
38 |         else:
39 |             message = f"Catalog '{catalog_id}' not found"
40 |         super().__init__(message, cause)
41 |         self.catalog_id = catalog_id
42 |         self.data_source_id = data_source_id
43 | 
44 | 
45 | class DataSourceConnectionError(StoreException):
46 |     """Raised when data source connection fails."""
47 | 
48 |     def __init__(
49 |         self,
50 |         data_source_id: str,
51 |         connection_error: str,
52 |         cause: Optional[Exception] = None,
53 |     ):
54 |         message = (
55 |             f"Failed to connect to data source '{data_source_id}': {connection_error}"
56 |         )
57 |         super().__init__(message, cause)
58 |         self.data_source_id = data_source_id
59 |         self.connection_error = connection_error
60 | 


--------------------------------------------------------------------------------
/alembic/versions/34dd3ed73def_.py:
--------------------------------------------------------------------------------
 1 | """empty message
 2 | 
 3 | Revision ID: 34dd3ed73def
 4 | Revises: 32c47e486eed
 5 | Create Date: 2025-08-13 23:10:17.202491
 6 | 
 7 | """
 8 | 
 9 | from typing import Sequence, Union
10 | 
11 | import sqlalchemy as sa
12 | import sqlmodel
13 | 
14 | from alembic import op
15 | 
16 | # revision identifiers, used by Alembic.
17 | revision: str = "34dd3ed73def"
18 | down_revision: Union[str, None] = "32c47e486eed"
19 | branch_labels: Union[str, Sequence[str], None] = None
20 | depends_on: Union[str, Sequence[str], None] = None
21 | 
22 | 
23 | def upgrade() -> None:
24 |     # ### commands auto generated by Alembic - please adjust! ###
25 |     op.create_table(
26 |         "documentproject",
27 |         sa.Column("id", sa.Integer(), nullable=False),
28 |         sa.Column("project_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False),
29 |         sa.Column("description", sqlmodel.sql.sqltypes.AutoString(), nullable=True),
30 |         sa.Column("document_count", sa.Integer(), nullable=False),
31 |         sa.Column("created_at", sa.DateTime(), nullable=True),
32 |         sa.Column("updated_at", sa.DateTime(), nullable=True),
33 |         sa.Column("is_active", sa.Boolean(), nullable=False),
34 |         sa.PrimaryKeyConstraint("id"),
35 |     )
36 |     op.drop_table("vectorstore")
37 |     # ### end Alembic commands ###
38 | 
39 | 
40 | def downgrade() -> None:
41 |     # ### commands auto generated by Alembic - please adjust! ###
42 |     op.create_table(
43 |         "vectorstore",
44 |         sa.Column("id", sa.INTEGER(), nullable=False),
45 |         sa.Column("project_name", sa.VARCHAR(), nullable=False),
46 |         sa.Column("online_store", sa.VARCHAR(), nullable=False),
47 |         sa.Column("online_store_configs", sa.VARCHAR(), nullable=True),
48 |         sa.Column("offline_store", sa.VARCHAR(), nullable=False),
49 |         sa.Column("offline_store_configs", sa.VARCHAR(), nullable=True),
50 |         sa.PrimaryKeyConstraint("id"),
51 |     )
52 |     op.drop_table("documentproject")
53 |     # ### end Alembic commands ###
54 | 


--------------------------------------------------------------------------------
/TODOs.md:
--------------------------------------------------------------------------------
 1 | 
 2 | # TODOs
 3 | 
 4 | ## Tasks
 5 | 1. There are two places doing the indexing for datasource catalog and metadata. one is in catalog_manager that use catalog_store, and the other is in vector_store/base that has index_datasource. they do similar functionalities, how this should be refactored and optimized?
 6 | 2. for the internal agent implementation, there are multiple llm calls, which use the prompt in the module, I think it would be better to move to the prompt/ folder, and make it more modular, so that we can easily add more prompts in the future.
 7 | 3. Add Redshift as a datasource.
 8 |  5. Currently the internal sql agent will get all catalogs from the catalog store, and then filter the catalogs based on the user question. This will cause performance issue when there are a lot of catalogs in the store. We need to optimize this, that allow to ask the user to index and search the catalogs. think about how to design and implement this flow.
 9 | 4. Update and Optimize the docs, there are a lot of missing docs since last code change and version. we need to update the docs in details now. please check all pages, think about what are missing, especially on tutorial, how to use the agent, how to use the cli, how to set the configs etc.
10 | 
11 | 
12 | ## backlogs
13 | 1. the impl should be able to index and search the data sources / catalog in the store, so that the agent won't need to load all the datasources / catalog every time.
14 | 2. update the documentation to reflect the changes made in the system.
15 | 3. optimize and add more tests.
16 | 4. fix the mypy for the entire project ryoma_ai
17 | 5. fix the mypy for the entire project ryoma_lab
18 | 
19 | ## Important
20 | 1. For any code, no fallback silently.
21 | 2. If any exception happens, raise it specific Exception. if Exception doesn't exsits, create one.
22 | 2. Always try to implement in OOP way, which means more module, and more class so that future extension is easier.
23 | 3. try to model data and logic separately. Try to avoid directly using dict or list to hold data.
24 | 4. Always add type hints for functions and methods.
25 | 5. Always add docstrings for all classes, functions and methods.
26 | 6. Always add relevant tests for new features and logic.


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/prompt/__init__.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Ryoma AI Prompt System
 3 | 
 4 | A modern, modular prompt management system for AI applications.
 5 | 
 6 | Key Components:
 7 | - PromptManager: High-level interface for prompt creation
 8 | - PromptType, ExampleFormat, SelectorStrategy: Configuration enums
 9 | - prompt_registry: Global registry for custom components
10 | 
11 | Basic Usage:
12 |     from ryoma_ai.prompt import prompt_manager, PromptType
13 | 
14 |     # Create a SQL prompt
15 |     prompt = prompt_manager.create_sql_prompt(
16 |         schema="CREATE TABLE users (id INT, name VARCHAR(100))",
17 |         question="What are all the user names?"
18 |     )
19 | 
20 |     # Create a chat prompt
21 |     prompt = prompt_manager.create_chat_prompt(
22 |         user_input="Hello, how can you help me?",
23 |         context="You are a data analyst assistant"
24 |     )
25 | 
26 | Advanced Usage:
27 |     # Register custom templates
28 |     prompt_manager.register_template(
29 |         name="analysis_template",
30 |         prompt_type=PromptType.INSTRUCTION_FOLLOWING,
31 |         template_string="Analyze this data: {data}",
32 |         description="Template for data analysis tasks"
33 |     )
34 | """
35 | 
36 | # Backward compatibility
37 | from ryoma_ai.prompt.base import BasePromptTemplate, BasicContextPromptTemplate
38 | 
39 | # Base classes for extensions
40 | # Core components
41 | from ryoma_ai.prompt.core import (
42 |     ExampleFormat,
43 |     ExampleFormatter,
44 |     ExampleSelector,
45 |     PromptBuilder,
46 |     PromptConfig,
47 |     PromptTemplate,
48 |     PromptType,
49 |     SelectorStrategy,
50 |     prompt_registry,
51 | )
52 | 
53 | # Main interface
54 | from ryoma_ai.prompt.manager import prompt_manager
55 | 
56 | __all__ = [
57 |     # Main interface
58 |     "prompt_manager",
59 |     # Configuration enums
60 |     "PromptType",
61 |     "ExampleFormat",
62 |     "SelectorStrategy",
63 |     # Core classes
64 |     "PromptConfig",
65 |     "PromptTemplate",
66 |     "prompt_registry",
67 |     # Extension points
68 |     "PromptBuilder",
69 |     "ExampleSelector",
70 |     "ExampleFormatter",
71 |     # Backward compatibility
72 |     "BasePromptTemplate",
73 |     "BasicContextPromptTemplate",
74 | ]
75 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/model_selector.py:
--------------------------------------------------------------------------------
 1 | import reflex as rx
 2 | from ryoma_lab.models.llm import ChatModelProvider, EmbeddingModelProvider
 3 | 
 4 | 
 5 | def model_selector(
 6 |     model_provider,
 7 |     model_value,
 8 |     on_model_value_change,
 9 |     trigger_width: str = "12em",
10 | ) -> rx.Component:
11 |     return rx.select.root(
12 |         rx.select.trigger(
13 |             placeholder="Select a model",
14 |             width=trigger_width,
15 |         ),
16 |         rx.select.content(
17 |             *[
18 |                 rx.select.group(
19 |                     rx.select.label(p.value.name),
20 |                     rx.foreach(
21 |                         p.value.models,
22 |                         lambda x: rx.select.item(x, value=f"{p.value.id}:{x}"),
23 |                     ),
24 |                 )
25 |                 for p in list(model_provider)
26 |             ],
27 |         ),
28 |         value=model_value,
29 |         on_change=on_model_value_change,
30 |         default_value="gpt-3.5-turbo",
31 |     )
32 | 
33 | 
34 | def embedding_model_selector(
35 |     model_value,
36 |     on_model_value_change,
37 |     trigger_width: str = "12em",
38 | ) -> rx.Component:
39 |     """
40 |     Embedding model selector.
41 |     @param model_value: model value selected.
42 |     @param on_model_value_change: on model value change.
43 |     @param trigger_width: change trigger width.
44 |     @return: Embedding model selector component.
45 |     """
46 |     return model_selector(
47 |         EmbeddingModelProvider,
48 |         model_value,
49 |         on_model_value_change,
50 |         trigger_width,
51 |     )
52 | 
53 | 
54 | def chat_model_selector(
55 |     model_value,
56 |     on_model_value_change,
57 |     trigger_width: str = "12em",
58 | ) -> rx.Component:
59 |     """
60 |     Chat model selector.
61 |     @param model_value: model value selected.
62 |     @param on_model_value_change: On model value change.
63 |     @param trigger_width: change trigger width.
64 |     @return: the chat model selector component.
65 |     """
66 |     return model_selector(
67 |         ChatModelProvider,
68 |         model_value,
69 |         on_model_value_change,
70 |         trigger_width,
71 |     )
72 | 


--------------------------------------------------------------------------------
/docs/source/architecture/reforce-sql-agent-workflow.mmd:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: ReFoRCE SQL Agent Workflow
 3 | ---
 4 | graph TD
 5 |     %% Main Workflow
 6 |     A[🤔 User Question] --> B[📊 Database Info Compression]
 7 |     B --> C[📋 Format Restriction Generation]
 8 |     C --> D[🔍 Column Exploration]
 9 |     D --> E[⚡ Parallel SQL Generation]
10 |     E --> F[🔄 Self-Refinement]
11 |     F --> G[🗳️ Consensus Voting]
12 |     G --> H[✅ Final Validation]
13 |     H --> I[📝 Final Answer]
14 | 
15 |     %% Database Info Compression Details
16 |     B --> B1[🏷️ Pattern-based<br/>Table Grouping]
17 |     B --> B2[📏 Schema Size<br/>Analysis]
18 |     B --> B3[🎯 Representative<br/>Selection]
19 | 
20 |     %% Format Restriction Details
21 |     C --> C1[🔍 Format<br/>Analysis]
22 |     C --> C2[📋 Column<br/>Specification]
23 |     C --> C3[💡 Example<br/>Generation]
24 | 
25 |     %% Column Exploration Details
26 |     D --> D1[🔧 Exploration Query<br/>Generation]
27 |     D --> D2[▶️ Query<br/>Execution]
28 |     D --> D3[💎 Insight<br/>Extraction]
29 | 
30 |     %% Parallel Generation Details
31 |     E --> E1[🔀 Context<br/>Variation]
32 |     E --> E2[⚡ Parallel SQL<br/>Generation]
33 |     E --> E3[📦 Candidate<br/>Collection]
34 | 
35 |     %% Self-Refinement Details
36 |     F --> F1[🔍 Self-Consistency<br/>Check]
37 |     F --> F2[🛠️ Error<br/>Correction]
38 |     F --> F3[✨ Query<br/>Refinement]
39 | 
40 |     %% Consensus Voting Details
41 |     G --> G1[▶️ Result<br/>Execution]
42 |     G --> G2[⚖️ Result<br/>Comparison]
43 |     G --> G3[🗳️ Majority<br/>Vote]
44 | 
45 |     %% Final Validation Details
46 |     H --> H1[📊 Confidence<br/>Scoring]
47 |     H --> H2[✅ Final<br/>Validation]
48 |     H --> H3[📝 Response<br/>Formatting]
49 | 
50 |     %% Styling
51 |     classDef startEnd fill:#e3f2fd,stroke:#1976d2,stroke-width:3px,color:#000
52 |     classDef mainProcess fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px,color:#000
53 |     classDef subProcess fill:#e8f5e8,stroke:#388e3c,stroke-width:1px,color:#000
54 |     classDef innovation fill:#fff8e1,stroke:#f57c00,stroke-width:2px,color:#000
55 | 
56 |     class A,I startEnd
57 |     class B,C,D,E,F,G,H mainProcess
58 |     class B1,B2,B3,C1,C2,C3,D1,D2,D3,E1,E2,E3,F1,F2,F3,G1,G2,G3,H1,H2,H3 subProcess
59 | 


--------------------------------------------------------------------------------
/.github/workflows/main_ryoma-demo.yml:
--------------------------------------------------------------------------------
 1 | # Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy
 2 | # More GitHub Actions for Azure: https://github.com/Azure/actions
 3 | # More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions
 4 | 
 5 | name: Build and deploy ryoma_ai api
 6 | 
 7 | on:
 8 |   push:
 9 |     branches:
10 |       - main
11 |     paths:
12 |       - 'Dockerfile'
13 |       - 'ryoma_ai/**'
14 |       - 'ryoma_ai-lab/**'
15 |       - '.github/workflows/main_ryoma-demo.yml'
16 |   workflow_dispatch:
17 | 
18 | env:
19 |   AZURE_WEBAPP_PACKAGE_PATH: "./"
20 |   COMMIT_SHA: ${{ github.sha }}
21 | 
22 | jobs:
23 |   build:
24 |     runs-on: ubuntu-latest
25 |     environment: production
26 | 
27 |     defaults:
28 |       run:
29 |         working-directory: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }}
30 | 
31 |     steps:
32 |       - uses: actions/checkout@v4
33 | 
34 |       - name: Login to Azure Container Registry
35 |         uses: docker/login-action@v3
36 |         with:
37 |           registry: ryoma.azurecr.io
38 |           username: ryoma
39 |           password: ${{ secrets.AZURE_CR_PASSWORD }}
40 | 
41 |       - name: docker build image
42 |         run: |
43 |             docker build --platform linux/amd64 ./ -t ryoma-api --build-arg APP_ENV=production
44 |   
45 |       - name: Set up Docker Build
46 |         run: |
47 |           docker tag ryoma-api ryoma.azurecr.io/ryoma-dataapp:${{ env.COMMIT_SHA }}
48 |   
49 |       - name: Publish to Azure Container Registry
50 |         run: |
51 |           docker push ryoma.azurecr.io/ryoma-dataapp:${{ env.COMMIT_SHA }}
52 | 
53 | #  deploy:
54 | #    runs-on: ubuntu-latest
55 | #    needs: build
56 | #    environment: production
57 | #
58 | #    steps:
59 | #      - name: Log in to Azure
60 | #        uses: azure/login@v1
61 | #        with:
62 | #          creds: ${{ secrets.AZURE_CREDENTIALS }}
63 | #
64 | #      - name: deploy Container App
65 | #        uses: azure/container-apps-deploy-action@v2
66 | #        with:
67 | #          acrName: ryomaregistry
68 | #          containerAppName: ryoma_ai-dataapp
69 | #          resourceGroup: ryoma_ai
70 | #          imageToDeploy: ryomaregistry.azurecr.io/ryoma_ai-dataapp:${{ env.COMMIT_SHA }}
71 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/pages/settings.py:
--------------------------------------------------------------------------------
 1 | """The settings page."""
 2 | 
 3 | import reflex as rx
 4 | from ryoma_lab.templates import ThemeState, template
 5 | 
 6 | 
 7 | @template(route="/settings", title="Settings")
 8 | def settings() -> rx.Component:
 9 |     """The settings page.
10 | 
11 |     Returns:
12 |         The UI for the settings page.
13 |     """
14 |     return rx.vstack(
15 |         rx.heading("Settings", size="8"),
16 |         rx.hstack(
17 |             rx.text("Dark mode: "),
18 |             rx.color_mode.switch(),
19 |         ),
20 |         rx.hstack(
21 |             rx.text("Primary color: "),
22 |             rx.select(
23 |                 [
24 |                     "tomato",
25 |                     "red",
26 |                     "ruby",
27 |                     "crimson",
28 |                     "pink",
29 |                     "plum",
30 |                     "purple",
31 |                     "violet",
32 |                     "iris",
33 |                     "indigo",
34 |                     "blue",
35 |                     "cyan",
36 |                     "teal",
37 |                     "jade",
38 |                     "green",
39 |                     "grass",
40 |                     "brown",
41 |                     "orange",
42 |                     "sky",
43 |                     "mint",
44 |                     "lime",
45 |                     "yellow",
46 |                     "amber",
47 |                     "gold",
48 |                     "bronze",
49 |                     "gray",
50 |                 ],
51 |                 value=ThemeState.accent_color,
52 |                 on_change=ThemeState.set_accent_color,
53 |             ),
54 |         ),
55 |         rx.hstack(
56 |             rx.text("Secondary color: "),
57 |             rx.select(
58 |                 [
59 |                     "gray",
60 |                     "mauve",
61 |                     "slate",
62 |                     "sage",
63 |                     "olive",
64 |                     "sand",
65 |                 ],
66 |                 value=ThemeState.gray_color,
67 |                 on_change=ThemeState.set_gray_color,
68 |             ),
69 |         ),
70 |         rx.text(
71 |             "You can edit this page in ",
72 |             rx.code("{your_app}/pages/settings.py"),
73 |             size="1",
74 |         ),
75 |     )
76 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/states/base.py:
--------------------------------------------------------------------------------
 1 | """Base state for the app."""
 2 | 
 3 | from typing import List, Optional
 4 | 
 5 | import reflex as rx
 6 | from fastapi_users.db import (
 7 |     SQLAlchemyBaseOAuthAccountTableUUID,
 8 |     SQLAlchemyBaseUserTableUUID,
 9 | )
10 | from sqlalchemy import JSON, Column
11 | from sqlmodel import Field, Relationship, select
12 | 
13 | 
14 | class OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, rx.Model, table=True):
15 |     id: Optional[str] = Field(default=None, primary_key=True)
16 | 
17 |     user_id: Optional[str] = Field(default=None, foreign_key="user.id")
18 |     user: "User" = Relationship(back_populates="oauth_accounts")
19 | 
20 | 
21 | class User(SQLAlchemyBaseUserTableUUID, rx.Model, table=True):
22 |     id: Optional[str] = Field(default=None, primary_key=True)
23 |     anonymous: bool = Field(default=True)
24 |     username: str = Field(nullable=False, unique=True)
25 |     name: str = Field(default="")
26 |     display_name: str = Field(default="")
27 |     initials: str = Field(nullable=True)
28 |     color: str = Field(nullable=True)
29 |     avatar_url: str = Field(nullable=True)
30 |     workspace: str = Field(default="{}", nullable=False)
31 |     settings: str = Field(default="{}", nullable=False)
32 |     permissions: str = Field(sa_column=Column(JSON), default={})
33 |     oauth_accounts: List[OAuthAccount] = Relationship(back_populates="user")
34 | 
35 | 
36 | class BaseState(rx.State):
37 |     """State for the app."""
38 | 
39 |     user: Optional[User] = None
40 | 
41 |     sidebar_displayed: bool = False
42 | 
43 |     @rx.var
44 |     def origin_url(self) -> str:
45 |         """Get the url of the current page.
46 | 
47 |         Returns:
48 |             str: The url of the current page.
49 |         """
50 |         return self.router_data.get("asPath", "")
51 | 
52 |     def toggle_sidebar_displayed(self) -> None:
53 |         """Toggle the sidebar_chat_history displayed."""
54 |         self.sidebar_displayed = not self.sidebar_displayed
55 | 
56 |     def load_user(self) -> None:
57 |         """Load the user."""
58 |         with rx.session() as session:
59 |             self.user = session.exec(
60 |                 select(User).where(User.username == "admin")
61 |             ).first()
62 | 
63 |     def on_load(self) -> None:
64 |         """Load the state."""
65 |         self.load_user()
66 | 


--------------------------------------------------------------------------------
/docs/source/architecture/multi-agent-routing-clean.mmd:
--------------------------------------------------------------------------------
 1 | flowchart TD
 2 |     %% User Input
 3 |     User["User Input<br/>Natural Language Question"] --> Router
 4 |     
 5 |     %% Smart Router
 6 |     Router["LLM-Based Smart Router<br/>Intent Classification & Confidence Scoring"]
 7 |     
 8 |     %% Agent Selection
 9 |     Router --> SQL["SQL Agent<br/>Database Operations"]
10 |     Router --> Python["Python Agent<br/>Code Execution"]
11 |     Router --> Analysis["Data Analysis Agent<br/>Statistics & Visualization"]
12 |     Router --> Chat["Chat Agent<br/>General Q&A"]
13 |     
14 |     %% Capabilities
15 |     SQL --> SQLCap["Natural Language to SQL<br/>Schema Exploration<br/>Data Retrieval & Joins<br/>Approval Workflow"]
16 |     
17 |     Python --> PyCap["Script Execution<br/>Function Creation<br/>Algorithm Implementation<br/>Testing & Debugging"]
18 |     
19 |     Analysis --> AnalysisCap["Statistical Analysis<br/>Data Visualization<br/>Trend Analysis<br/>Report Generation"]
20 |     
21 |     Chat --> ChatCap["Explanations & Help<br/>Best Practices<br/>Conceptual Discussions<br/>Information Retrieval"]
22 |     
23 |     %% Data Integration
24 |     DataStore[("Multi-Database Support<br/>PostgreSQL, MySQL, SQLite<br/>DuckDB, Iceberg")]
25 |     
26 |     %% Vector Store
27 |     VectorStore[("Semantic Search<br/>Catalog Indexing<br/>Optimized Performance")]
28 |     
29 |     %% Connections
30 |     SQL -.->|Query| DataStore
31 |     Analysis -.->|Analyze| DataStore
32 |     SQL -.->|Search| VectorStore
33 |     
34 |     %% Output
35 |     SQLCap --> Output["Intelligent Response<br/>Context-Aware Results"]
36 |     PyCap --> Output
37 |     AnalysisCap --> Output
38 |     ChatCap --> Output
39 |     
40 |     Output --> User
41 |     
42 |     %% Styling
43 |     classDef userStyle fill:#e3f2fd,stroke:#1976d2,stroke-width:3px,color:#000
44 |     classDef routerStyle fill:#f3e5f5,stroke:#7b1fa2,stroke-width:3px,color:#000
45 |     classDef agentStyle fill:#e8f5e8,stroke:#388e3c,stroke-width:3px,color:#000
46 |     classDef dataStyle fill:#fff8e1,stroke:#f57c00,stroke-width:2px,color:#000
47 |     classDef outputStyle fill:#fce4ec,stroke:#c2185b,stroke-width:3px,color:#000
48 |     
49 |     class User userStyle
50 |     class Router routerStyle
51 |     class SQL,Python,Analysis,Chat agentStyle
52 |     class DataStore,VectorStore dataStyle
53 |     class Output outputStyle


--------------------------------------------------------------------------------
/alembic/env.py:
--------------------------------------------------------------------------------
 1 | from logging.config import fileConfig
 2 | 
 3 | from sqlalchemy import engine_from_config, pool
 4 | 
 5 | from alembic import context
 6 | 
 7 | # this is the Alembic Config object, which provides
 8 | # access to the values within the .ini file in use.
 9 | config = context.config
10 | 
11 | # Interpret the config file for Python logging.
12 | # This line sets up loggers basically.
13 | if config.config_file_name is not None:
14 |     fileConfig(config.config_file_name)
15 | 
16 | # add your model's MetaData object here
17 | # for 'autogenerate' support
18 | # from myapp import mymodel
19 | # target_metadata = mymodel.Base.metadata
20 | target_metadata = None
21 | 
22 | # other values from the config, defined by the needs of env.py,
23 | # can be acquired:
24 | # my_important_option = config.get_main_option("my_important_option")
25 | # ... etc.
26 | 
27 | 
28 | def run_migrations_offline() -> None:
29 |     """Run migrations in 'offline' mode.
30 | 
31 |     This configures the context with just a URL
32 |     and not an Engine, though an Engine is acceptable
33 |     here as well.  By skipping the Engine creation
34 |     we don't even need a DBAPI to be available.
35 | 
36 |     Calls to context.execute() here emit the given string to the
37 |     script output.
38 | 
39 |     """
40 |     url = config.get_main_option("sqlalchemy.url")
41 |     context.configure(
42 |         url=url,
43 |         target_metadata=target_metadata,
44 |         literal_binds=True,
45 |         dialect_opts={"paramstyle": "named"},
46 |     )
47 | 
48 |     with context.begin_transaction():
49 |         context.run_migrations()
50 | 
51 | 
52 | def run_migrations_online() -> None:
53 |     """Run migrations in 'online' mode.
54 | 
55 |     In this scenario we need to create an Engine
56 |     and associate a connection with the context.
57 | 
58 |     """
59 |     connectable = engine_from_config(
60 |         config.get_section(config.config_ini_section, {}),
61 |         prefix="sqlalchemy.",
62 |         poolclass=pool.NullPool,
63 |     )
64 | 
65 |     with connectable.connect() as connection:
66 |         context.configure(connection=connection, target_metadata=target_metadata)
67 | 
68 |         with context.begin_transaction():
69 |             context.run_migrations()
70 | 
71 | 
72 | if context.is_offline_mode():
73 |     run_migrations_offline()
74 | else:
75 |     run_migrations_online()
76 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/models/data_catalog.py:
--------------------------------------------------------------------------------
 1 | import uuid
 2 | from typing import List, Optional
 3 | 
 4 | import reflex as rx
 5 | from sqlmodel import Field, Relationship
 6 | 
 7 | 
 8 | class CatalogTable(rx.Model, table=True):
 9 |     """The Catalog Table Model."""
10 | 
11 |     __tablename__ = "catalog"
12 | 
13 |     id: str = Field(
14 |         default_factory=lambda: str(uuid.uuid4()), primary_key=True, max_length=36
15 |     )
16 |     datasource: Optional[str] = Field(None, description="Name of the datasource")
17 |     catalog_name: str = Field(
18 |         ..., description="Name of the catalog, also known as the database name"
19 |     )
20 | 
21 |     schemas: List["SchemaTable"] = Relationship(back_populates="catalog")
22 | 
23 | 
24 | class SchemaTable(rx.Model, table=True):
25 |     """The Schema Model."""
26 | 
27 |     __tablename__ = "schema"
28 | 
29 |     id: str = Field(
30 |         default_factory=lambda: str(uuid.uuid4()), primary_key=True, max_length=36
31 |     )
32 |     schema_name: str
33 |     tables: List["TableTable"] = Relationship(back_populates="schema")
34 | 
35 |     catalog_id: Optional[str] = Field(default=None, foreign_key="catalog.id")
36 |     catalog: Optional[CatalogTable] = Relationship(back_populates="schemas")
37 | 
38 | 
39 | class TableTable(rx.Model, table=True):
40 |     """The Table Model."""
41 | 
42 |     __tablename__ = "table"
43 | 
44 |     id: str = Field(
45 |         default_factory=lambda: str(uuid.uuid4()), primary_key=True, max_length=36
46 |     )
47 |     table_name: str
48 |     description: Optional[str] = None
49 |     is_view: Optional[bool] = False
50 |     attrs: Optional[str] = None
51 |     columns: List["ColumnTable"] = Relationship(back_populates="table")
52 | 
53 |     schema_id: Optional[str] = Field(default=None, foreign_key="schema.id")
54 |     schema: Optional[SchemaTable] = Relationship(back_populates="tables")
55 | 
56 | 
57 | class ColumnTable(rx.Model, table=True):
58 |     """The Column Model."""
59 | 
60 |     __tablename__ = "column"
61 | 
62 |     id: str = Field(
63 |         default_factory=lambda: str(uuid.uuid4()), primary_key=True, max_length=36
64 |     )
65 |     name: str
66 |     type: str
67 |     description: Optional[str] = None
68 | 
69 |     table_id: Optional[str] = Field(default=None, foreign_key="table.id")
70 |     table: Optional[TableTable] = Relationship(back_populates="columns")
71 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/agent/spark_agent.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from ryoma_ai.agent.workflow import WorkflowAgent
 3 | from ryoma_ai.tool.python_tool import PythonTool
 4 | from ryoma_ai.tool.spark_tool import ConvertPandasToSparkTool, SparkTool
 5 | 
 6 | 
 7 | class SparkAgent(WorkflowAgent):
 8 |     description: str = (
 9 |         "A PySpark agent that can use PySpark tools to run PySpark scripts."
10 |     )
11 | 
12 |     def __init__(
13 |         self, spark_configs: dict[str, str], model: str, model_parameters=None
14 |     ):
15 |         self.spark_session = None
16 |         self.init_session(spark_configs)
17 |         super().__init__(
18 |             [
19 |                 SparkTool(),
20 |                 ConvertPandasToSparkTool(),
21 |             ],
22 |             model,
23 |             model_parameters,
24 |         )
25 |         for tool in self.tools:
26 |             if isinstance(tool, PythonTool):
27 |                 tool.update_script_context(
28 |                     script_context={"spark_session": self.spark_session}
29 |                 )
30 | 
31 |     def init_session(self, spark_configs: dict[str, str]):
32 |         self.spark_session = self.create_spark_session(spark_configs)
33 |         self.spark_session.conf.set("spark.sql.execution.arrow.enabled", "true")
34 | 
35 |     @staticmethod
36 |     def create_spark_session(spark_configs: dict[str, str]):
37 |         assert "master" in spark_configs, "master is required in spark_configs"
38 |         assert "app_name" in spark_configs, "app_name is required in spark_configs"
39 | 
40 |         # TODO refactor to use ibis spark backend
41 |         import findspark
42 |         from pyspark.sql import SparkSession
43 | 
44 |         findspark.init()
45 | 
46 |         return (
47 |             SparkSession.builder.master(spark_configs.get("master"))
48 |             .appName(spark_configs.get("app_name"))
49 |             .getOrCreate()
50 |         )
51 | 
52 |     def add_pandas_dataframe(self, dataframe: pd.DataFrame):
53 |         df_id = f"df_{id(dataframe)}"
54 |         self.add_prompt(
55 |             f"""
56 |         dataframe name: {df_id}
57 |         dataframe metadata: {dataframe.info}
58 |         """
59 |         )
60 |         for tool in self.tools:
61 |             if isinstance(tool, PythonTool):
62 |                 tool.update_script_context(script_context={df_id: dataframe})
63 |         return self
64 | 


--------------------------------------------------------------------------------
/tests/unit_tests/test_tool.py:
--------------------------------------------------------------------------------
 1 | from unittest.mock import Mock, patch
 2 | 
 3 | import pandas as pd
 4 | import pytest
 5 | from pyspark.sql import SparkSession
 6 | from ryoma_ai.models.sql import QueryStatus
 7 | from ryoma_ai.tool.pandas_tool import PandasTool
 8 | from ryoma_ai.tool.spark_tool import SparkTool
 9 | from ryoma_ai.tool.sql_tool import SqlQueryTool
10 | 
11 | from tests.unit_tests.test_datasource import MockSqlDataSource
12 | 
13 | 
14 | @pytest.fixture
15 | def pandas_dataframe():
16 |     df = pd.DataFrame(
17 |         {
18 |             "year": [2020, 2022, 2019, 2021],
19 |             "n_legs": [2, 4, 5, 100],
20 |             "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"],
21 |         }
22 |     )
23 |     return df
24 | 
25 | 
26 | @pytest.fixture
27 | def mock_sql_data_source():
28 |     data_source = MockSqlDataSource()
29 |     return data_source
30 | 
31 | 
32 | @pytest.fixture
33 | def pyspark_session():
34 |     return SparkSession.builder.appName("pytest").getOrCreate()
35 | 
36 | 
37 | def test_pyspark_tool(pyspark_session, pandas_dataframe):
38 |     pyspark_tool = SparkTool()
39 |     pyspark_tool.update_script_context(
40 |         {"spark_session": pyspark_session, "df": pandas_dataframe}
41 |     )
42 |     script = """
43 |     spark_session.createDataFrame(df).show()
44 |     """
45 |     result = pyspark_tool._run(script)
46 |     assert result.success is True
47 | 
48 | 
49 | def test_sql_tool(mock_sql_data_source):
50 |     with patch("ryoma_ai.datasource.sql.SqlDataSource.query") as mock_execute:
51 |         mock_execute.return_value = "success"
52 | 
53 |         # Mock the store to return the datasource
54 |         mock_store = Mock()
55 |         mock_store.get.return_value = Mock(value=mock_sql_data_source)
56 | 
57 |         sql_tool = SqlQueryTool()
58 |         query = "SELECT * FROM customers LIMIT 4"
59 |         result = sql_tool._run(query, mock_store)
60 |         assert result.data == "success"
61 |         assert result.status == QueryStatus.SUCCESS
62 | 
63 | 
64 | def test_pandas_tool(pandas_dataframe):
65 |     pandas_tool = PandasTool()
66 |     pandas_tool.update_script_context({"df": pandas_dataframe})
67 |     script = """
68 |     df["year"] = df["year"] + 1
69 |     df
70 |     """
71 |     result = pandas_tool._run(script)
72 |     assert result.success is True
73 |     assert result.result["year"].tolist() == [2021, 2023, 2020, 2022]
74 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling>=1.4.0"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [project]
 6 | name = "ryoma_ai"
 7 | version = "0.1.5"
 8 | description = "AI Powered Data Platform"
 9 | readme = "README.md"
10 | repository = "https://github.com/project-ryoma/ryoma"
11 | homepage = "https://www.ryoma-ai.com"
12 | requires-python = ">=3.9"
13 | 
14 | keywords = ["Artificial Intelligence", "Data Analysis", "Code Generation"]
15 | 
16 | classifiers = [
17 |     "Development Status :: 3 - Alpha",
18 |     "Intended Audience :: Developers",
19 |     "Operating System :: OS Independent",
20 |     "Topic :: Software Development :: Libraries :: Python Modules",
21 |     "License :: OSI Approved :: Apache Software License",
22 |     "Programming Language :: Python :: 3",
23 |     "Programming Language :: Python :: 3.9",
24 |     "Programming Language :: Python :: 3.10",
25 | ]
26 | 
27 | dependencies = [
28 |   "mock>=5.1.0",
29 |   "openai>=1.33.0",
30 |   "pydantic>=2.7.1",
31 |   "pandas>=2.2.2",
32 |   "langchain-openai>=0.1.7",
33 |   "langchain>=0.2.9, <0.3.0",
34 |   "langgraph>=0.2.0",
35 |   "ipython>=8.14.0",
36 |   "pyarrow>=16.1.0",
37 |   "typer>=0.12.3",
38 |   "types-setuptools>=70.0.0.20240524",
39 |   "datasketch>=1.6.5",
40 |   "httpx==0.27.2",
41 |   "click>=8.0.0",
42 |   "rich>=13.0.0",
43 |   "prompt-toolkit>=3.0.0",
44 |   "amundsen-databuilder>=7.5.0",
45 |   "ibis-framework>=9.0.0",
46 | ]
47 | 
48 | [project.scripts]
49 | ryoma_ai = "ryoma_ai.cli.main:main"
50 | 
51 | [project.optional-dependencies]
52 | snowflake = [
53 |     "ibis-framework[snowflake]>=9.0.0",
54 |     "amundsen-databuilder[snowflake]>=7.5.0",
55 | ]
56 | sqlite = [
57 |     "ibis-framework[sqlite]>=9.0.0",
58 | ]
59 | postgres = [
60 |     "psycopg2>=2.9.2",
61 |     "ibis-framework[postgres]>=9.0.0",
62 | ]
63 | mysql = [
64 |     "ibis-framework[mysql]>=9.0.0",
65 |     "amundsen-databuilder[rds]>=7.5.0",
66 | ]
67 | bigquery = [
68 |     "ibis-framework[bigquery]>=9.0.0",
69 |     "amundsen-databuilder[bigquery]>=7.5.0",
70 | ]
71 | pyspark = [
72 |     "ibis-framework[pyspark]>=9.0.0",
73 |     "pyspark>=3.2.0",
74 |     "findspark>=1.4.2",
75 | ]
76 | duckdb = [
77 |     "duckdb>=1.0.0",
78 |     "ibis-framework[duckdb]>=9.0.0"
79 | ]
80 | dynamodb = [
81 |     "boto3>=1.28.0",
82 |     "aioboto3>=11.0.0"
83 | ]
84 | iceberg = [
85 |     "pyiceberg>=0.5.0",
86 |     "pyarrow>=14.0.0"
87 | ]
88 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/react_rnd.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Optional
 2 | 
 3 | import reflex as rx
 4 | 
 5 | 
 6 | class DraggableData(rx.Base):
 7 |     x: int
 8 |     y: int
 9 |     deltaX: int
10 |     deltaY: int
11 |     lastX: int
12 |     lastY: int
13 | 
14 | 
15 | class RnD(rx.Component):
16 |     library = "react-rnd"
17 |     tag = "Rnd"
18 | 
19 |     # Props
20 |     default: rx.Var[Dict[str, Any]]
21 |     position: rx.Var[Dict[str, int]]
22 |     size: rx.Var[Dict[str, int]]
23 |     bounds: rx.Var[str]
24 |     min_width: rx.Var[int]
25 |     min_height: rx.Var[int]
26 |     max_width: rx.Var[int]
27 |     max_height: rx.Var[int]
28 |     drag_grid: rx.Var[tuple[int, int]]
29 |     resize_grid: rx.Var[tuple[int, int]]
30 |     lockAspectRatio: rx.Var[bool]
31 |     enable_user_select_hack: rx.Var[bool]
32 |     disable_dragging: rx.Var[bool]
33 |     enable: rx.Var[Dict[str, bool]]
34 | 
35 |     # Event handlers
36 |     on_drag_start: rx.Var[Optional[rx.EventHandler]]
37 |     on_drag: rx.Var[Optional[rx.EventHandler]]
38 |     on_drag_stop: rx.Var[Optional[rx.EventHandler]]
39 |     on_resize_start: rx.Var[Optional[rx.EventHandler]]
40 |     on_resize: rx.Var[Optional[rx.EventHandler]]
41 |     on_resize_stop: rx.Var[Optional[rx.EventHandler]]
42 | 
43 |     def get_event_triggers(self) -> Dict[str, Any]:
44 |         """Get event triggers."""
45 | 
46 |         def drag_signature(e0, data: DraggableData):
47 |             """Get the drag signature."""
48 |             return [
49 |                 data.x,
50 |                 data.y,
51 |                 data.deltaX,
52 |                 data.deltaY,
53 |                 data.lastX,
54 |                 data.lastY,
55 |             ]
56 | 
57 |         def resize_signature(e0, direction, ref, delta, position):
58 |             """Get the resize signature."""
59 |             return [
60 |                 direction,
61 |                 delta.width,
62 |                 delta.height,
63 |                 position.x,
64 |                 position.y,
65 |             ]
66 | 
67 |         return {
68 |             **super().get_event_triggers(),
69 |             "on_drag_start": drag_signature,
70 |             "on_drag": drag_signature,
71 |             "on_drag_stop": drag_signature,
72 |             "on_resize_start": resize_signature,
73 |             "on_resize": resize_signature,
74 |             "on_resize_stop": resize_signature,
75 |         }
76 | 
77 | 
78 | rnd = RnD.create
79 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["hatchling>=1.4.0"]
 3 | build-backend = "hatchling.build"
 4 | 
 5 | [tool.hatch.build.targets.wheel]
 6 | packages = ["src/foo"]
 7 | 
 8 | 
 9 | [project]
10 | name = "ryoma-dev"
11 | version = "0.0.1"
12 | description = "Root project for Ryoma AI Platform"
13 | license = {file = "LICENSE"}
14 | readme = "README.md"
15 | requires-python = ">=3.10"
16 | 
17 | dependencies = [
18 |     "ryoma_ai[snowflake, pyspark, sqlite, mysql, bigquery]",
19 |     "ryoma_lab",
20 |     "pytest >= 6.2.5",
21 |     "black >= 23.3.0",
22 |     "isort >= 5.9.3",
23 |     "mypy >= 0.910",
24 |     "flake8 >= 3.9.2",
25 |     "click >= 8.0.0",
26 |     "pip >= 21.3.1",
27 |     "setuptools >= 58.0.4",
28 |     "pyupgrade >= 2.26.0",
29 |     "pytest-cov>=5.0.0",
30 |     "openai-responses>=0.10.0",
31 |     "coverage-badge>=1.1.2",
32 |     "reflex-chakra>=0.7.0",
33 | ]
34 | 
35 | [tool.uv.sources]
36 | ryoma_ai = { workspace = true}
37 | ryoma_lab = { workspace = true}
38 | 
39 | [tool.uv.workspace]
40 | members = ["packages/ryoma_ai", "packages/ryoma_lab"]
41 | 
42 | [tool.isort]
43 | profile = "black"
44 | 
45 | 
46 | [tool.mypy]
47 | allow_redefinition = false
48 | check_untyped_defs = true
49 | disallow_any_generics = true
50 | disallow_incomplete_defs = true
51 | ignore_missing_imports = true
52 | implicit_reexport = false
53 | no_implicit_optional = true
54 | show_column_numbers = true
55 | show_error_codes = true
56 | show_error_context = true
57 | strict_equality = true
58 | strict_optional = true
59 | warn_no_return = true
60 | warn_redundant_casts = true
61 | warn_return_any = true
62 | warn_unreachable = true
63 | warn_unused_configs = true
64 | warn_unused_ignores = true
65 | explicit_package_bases = true
66 | 
67 | 
68 | [tool.pytest.ini_options]
69 | norecursedirs = ["hooks", "*.egg", ".eggs", "dist", "build", "docs", ".tox", ".git", "__pycache__"]
70 | doctest_optionflags = ["NUMBER", "NORMALIZE_WHITESPACE", "IGNORE_EXCEPTION_DETAIL"]
71 | addopts = [
72 |     "--strict-markers",
73 |     "--tb=short",
74 |     "--doctest-modules",
75 |     "--doctest-continue-on-failure",
76 | ]
77 | 
78 | [tool.coverage.run]
79 | source = ["tests"]
80 | 
81 | [coverage.paths]
82 | source = "ryoma"
83 | 
84 | [coverage.run]
85 | branch = true
86 | 
87 | [coverage.report]
88 | fail_under = 50
89 | show_missing = true
90 | 
91 | [dependency-groups]
92 | dev = [
93 |     "mypy>=1.11.2",
94 |     "pytest>=8.3.3",
95 |     "pytest-cov>=5.0.0",
96 |     "ruff>=0.12.10",
97 | ]
98 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/styles.py:
--------------------------------------------------------------------------------
 1 | """Styles for the app."""
 2 | 
 3 | import reflex as rx
 4 | 
 5 | global_style = {
 6 |     ".gridjs-container": {
 7 |         "font_size": "12px",
 8 |     }
 9 | }
10 | border_radius = "0.375rem"
11 | border = f"1px solid {rx.color('gray', 6)}"
12 | text_color = rx.color("gray", 11)
13 | accent_text_color = rx.color("accent", 10)
14 | accent_color = rx.color("accent", 1)
15 | hover_accent_color = {"_hover": {"color": accent_text_color}}
16 | hover_accent_bg = {"_hover": {"background_color": accent_color}}
17 | content_width_vw = "90vw"
18 | sidebar_width = "18em"
19 | 
20 | template_page_style = {
21 |     "padding_top": "5em",
22 |     "padding_x": ["auto", "0.5em"],
23 |     "flex": "1",
24 |     "overflow_x": "hidden",
25 | }
26 | 
27 | template_content_style = {
28 |     "border_radius": border_radius,
29 |     "margin_bottom": "2em",
30 |     "min_height": "90vh",
31 | }
32 | 
33 | link_style = {
34 |     "color": accent_text_color,
35 |     "text_decoration": "none",
36 |     **hover_accent_color,
37 | }
38 | 
39 | overlapping_button_style = {
40 |     "background_color": "white",
41 |     "border_radius": border_radius,
42 | }
43 | 
44 | markdown_style = {
45 |     "h1": lambda text: rx.heading(text, size="5", margin_y="1em"),
46 |     "h2": lambda text: rx.heading(text, size="3", margin_y="1em"),
47 |     "h3": lambda text: rx.heading(text, size="1", margin_y="1em"),
48 |     "p": lambda text: rx.text(text, color="black", margin_y="1em"),
49 |     "code": lambda text: rx.code(text, color_scheme="gray"),
50 |     "codeblock": lambda text, **props: rx.code_block(text, **props, margin_y="1em"),
51 |     "a": lambda text, **props: rx.link(
52 |         text,
53 |         **props,
54 |         color="blue",
55 |         _hover={"color": "red"},
56 |         font_weight="bold",
57 |         text_decoration="underline",
58 |         text_decoration_color=accent_text_color,
59 |     ),
60 |     "table": lambda el: rx.table.root(el, size="1", width="40em"),
61 |     "thead": lambda el: rx.table.header(el, border_bottom=border),
62 |     "tr": lambda text: rx.table.row(text, border_bottom=border),
63 | }
64 | 
65 | # Common styles for questions and answers.
66 | shadow = "rgba(0, 0, 0, 0.15) 0px 2px 8px"
67 | chat_margin = "20%"
68 | message_style = dict(
69 |     border_radius="5px",
70 |     box_shadow=shadow,
71 |     display="inline-block",
72 |     margin_y="0.5em",
73 |     padding_left="1em",
74 |     padding_right="1em",
75 |     max_width="44em",
76 | )
77 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/services/kernel/sqlkernel.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import re
 3 | from typing import Any, Dict
 4 | 
 5 | from ryoma_ai.datasource.factory import DataSourceFactory
 6 | from ryoma_ai.datasource.sql import SqlDataSource
 7 | from ryoma_lab.services.kernel.base import BaseKernel
 8 | from sqlalchemy.exc import SQLAlchemyError
 9 | 
10 | 
11 | class SqlKernel(BaseKernel):
12 |     datasource: SqlDataSource
13 | 
14 |     def __init__(self, datasource: SqlDataSource, **kwargs):
15 |         if not datasource:
16 |             datasource = DataSourceFactory.create_datasource("duckdb")
17 |         super().__init__(datasource, **kwargs)
18 | 
19 |     def execute(self, query: str) -> Dict[str, Any]:
20 |         logging.info(f"Executing SQL query: {query}")
21 | 
22 |         try:
23 |             df = self.datasource.query(query)
24 |             return {
25 |                 "output_type": "dataframe",
26 |                 "data": df,
27 |             }
28 |         except SQLAlchemyError as e:
29 |             logging.error(f"SQLAlchemy error: {str(e)}")
30 |             return self._create_error_response(e)
31 |         except Exception as e:
32 |             logging.error(f"Unexpected error: {str(e)}")
33 |             return self._create_error_response(e)
34 | 
35 |     def _extract_datasource_from_query(self, query: str) -> str:
36 |         # This regex looks for table names in common SQL patterns
37 |         pattern = r'\bFROM\s+"?(\w+)"?|\bJOIN\s+"?(\w+)"?'
38 |         matches = re.findall(pattern, query, re.IGNORECASE)
39 |         # Flatten and filter the matches
40 |         datasources = [ds for match in matches for ds in match if ds]
41 |         return datasources[0] if datasources else None
42 | 
43 |     def _get_datasource(self, name: str) -> SqlDataSource:
44 |         datasource = self.datasources.get(name)
45 |         if datasource:
46 |             logging.info(f"Found type: {name}")
47 |         else:
48 |             logging.warning(f"Datasource not found: {name}")
49 |         return datasource
50 | 
51 |     def _remove_datasource_from_query(self, query: str, datasource_name: str) -> str:
52 |         # Remove the type name from the query
53 |         pattern = r"\b" + re.escape(datasource_name) + r"\."
54 |         return re.sub(pattern, "", query, flags=re.IGNORECASE)
55 | 
56 |     def set_datasources(self, datasources: Dict[str, SqlDataSource]):
57 |         self.datasources = datasources
58 |         logging.info(f"Updated datasources: {list(self.datasources.keys())}")
59 | 


--------------------------------------------------------------------------------
/tests/e2e/ryoma_ai/test_agent.py:
--------------------------------------------------------------------------------
 1 | """
 2 | End-to-end tests for agents.
 3 | 
 4 | These tests can use either:
 5 | 1. OpenAI API (requires OPENAI_API_KEY environment variable)
 6 | 2. GPT4All local model (requires gpt4all package and will download model on first run)
 7 | """
 8 | 
 9 | import os
10 | 
11 | import pytest
12 | from ryoma_ai.agent.chat_agent import ChatAgent
13 | from ryoma_ai.agent.sql import SqlAgent
14 | 
15 | # Determine which backend to use
16 | USE_OPENAI = (
17 |     os.environ.get("OPENAI_API_KEY")
18 |     and os.environ.get("USE_OPENAI_FOR_TESTS", "").lower() == "true"
19 | )
20 | USE_GPT4ALL = (
21 |     not USE_OPENAI and os.environ.get("USE_GPT4ALL_FOR_TESTS", "").lower() == "true"
22 | )
23 | 
24 | if USE_GPT4ALL:
25 |     # Only import if we're using it
26 |     pytest.importorskip("gpt4all")
27 |     MODEL = "gpt4all:Llama-3.2-1B-Instruct-Q4_0.gguf"
28 | elif USE_OPENAI:
29 |     MODEL = "gpt-3.5-turbo"
30 | else:
31 |     # Skip all tests if no backend is configured
32 |     pytest.skip(
33 |         "No LLM backend configured. Set USE_OPENAI_FOR_TESTS=true or USE_GPT4ALL_FOR_TESTS=true",
34 |         allow_module_level=True,
35 |     )
36 | 
37 | 
38 | def test_base_agent():
39 |     """Test ChatAgent with configured model."""
40 |     ryoma_agent = ChatAgent(MODEL)
41 | 
42 |     # Test with a simple query - disable display to capture results
43 |     result = ryoma_agent.stream("What is 2 + 2?", display=False)
44 |     assert result is not None
45 | 
46 |     # Collect streamed results
47 |     responses = list(result)
48 |     assert len(responses) > 0
49 | 
50 |     # Check that we got some response content
51 |     response_text = "".join(
52 |         str(r.content if hasattr(r, "content") else str(r)) for r in responses
53 |     )
54 |     assert len(response_text) > 0
55 | 
56 | 
57 | def test_workflow_agent():
58 |     """Test SqlAgent with configured model."""
59 |     ryoma_agent = SqlAgent(MODEL)
60 | 
61 |     # Test with a simple SQL-related query - disable display to capture results
62 |     result = ryoma_agent.stream(
63 |         "Show me a simple SQL query to select all records from a table", display=False
64 |     )
65 |     assert result is not None
66 | 
67 |     # Collect streamed results
68 |     responses = list(result)
69 |     assert len(responses) > 0
70 | 
71 |     # Check that we got some response
72 |     response_text = "".join(
73 |         str(r.content if hasattr(r, "content") else str(r)) for r in responses
74 |     )
75 |     assert len(response_text) > 0
76 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/datasource/duckdb.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | from typing import Any, Optional
 3 | 
 4 | import duckdb
 5 | from pydantic import BaseModel, Field
 6 | from ryoma_ai.datasource.sql import SqlDataSource
 7 | 
 8 | 
 9 | class DuckDBConfig(BaseModel):
10 |     database: Optional[str] = Field(
11 |         default=":memory:",
12 |         description="DuckDB database file path or :memory: for in-memory",
13 |     )
14 |     read_only: Optional[bool] = Field(
15 |         default=False, description="Open database in read-only mode"
16 |     )
17 |     temp_directory: Optional[str] = Field(
18 |         default=None, description="Temporary directory for DuckDB operations"
19 |     )
20 |     extensions: Optional[list] = Field(
21 |         default=None, description="List of DuckDB extensions to load"
22 |     )
23 |     config: Optional[dict] = Field(
24 |         default=None, description="Additional DuckDB configuration options"
25 |     )
26 | 
27 | 
28 | class DuckDBDataSource(SqlDataSource):
29 |     def get_query_plan(self, query: str) -> Any:
30 |         pass
31 | 
32 |     def crawl_catalog(self, **kwargs):
33 |         pass
34 | 
35 |     def __init__(
36 |         self,
37 |         database: str = ":memory:",
38 |         read_only: bool = False,
39 |         temp_directory: Optional[str] = None,
40 |         extensions: Optional[list] = None,
41 |         config: Optional[dict] = None,
42 |         **kwargs,
43 |     ):
44 |         super().__init__(database=database, **kwargs)
45 |         self.read_only = read_only
46 |         self.config = config or {}
47 |         if temp_directory:
48 |             self.config["temp_directory"] = temp_directory
49 |         self.extensions = extensions
50 | 
51 |     def _connect(self, **kwargs) -> Any:
52 |         conn = duckdb.connect(
53 |             database=self.database,
54 |             read_only=self.read_only,
55 |             config=self.config,
56 |         )
57 |         if self.extensions:
58 |             for extension in self.extensions:
59 |                 conn.load_extension(extension)
60 |         return conn
61 | 
62 |     def query(self, query, result_format="pandas", **kwargs) -> Any:
63 |         conn = self.connect()
64 |         # TODO: Should we abstract this to support other backends?
65 |         inspect.currentframe().f_locals.update(**kwargs)
66 |         return conn.sql(query).execute().fetchdf()
67 | 
68 |     def register(self, name: str, data: Any, **kwargs):
69 |         conn = self.connect()
70 |         conn.register(name, data)
71 | 


--------------------------------------------------------------------------------
/tests/unit_tests/test_cli_fixed.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | Test the fixed CLI functionality
 4 | """
 5 | 
 6 | import sys
 7 | from pathlib import Path
 8 | 
 9 | from ryoma_ai.cli.app import RyomaAI
10 | 
11 | # Add the ryoma_ai package to Python path
12 | ryoma_ai_path = Path(__file__).parent / "packages" / "ryoma_ai"
13 | sys.path.insert(0, str(ryoma_ai_path))
14 | 
15 | 
16 | def test_cli_with_sqlite():
17 |     """Test CLI with SQLite database and basic mode."""
18 |     print("🧪 Testing CLI with SQLite and basic mode...")
19 | 
20 |     cli = RyomaAI()
21 | 
22 |     # Configure for SQLite basic mode
23 |     cli.config_manager.config["database"] = {"type": "sqlite", "database": ":memory:"}
24 |     cli.config_manager.config["mode"] = "basic"
25 | 
26 |     # Setup datasource and agent
27 |     if cli.datasource_manager.setup_from_config(cli.config_manager.config["database"]):
28 |         print("✅ Datasource setup successful")
29 | 
30 |         if cli.agent_manager.setup_agent_manager(
31 |             config=cli.config_manager.config,
32 |             datasource=cli.datasource_manager.current_datasource,
33 |         ):
34 |             print("✅ Agent setup successful")
35 | 
36 |             # Create test table directly
37 |             conn = cli.datasource_manager.current_datasource.connect()
38 |             conn.raw_sql("CREATE TABLE users (id INTEGER, name TEXT, city TEXT)")
39 |             conn.raw_sql(
40 |                 "INSERT INTO users VALUES (1, 'Alice', 'New York'), (2, 'Bob', 'London'), (3, 'Charlie', 'Tokyo')"
41 |             )
42 |             print("✅ Test data created")
43 | 
44 |             # Simulate a question (but don't actually process it interactively)
45 |             print("CLI is ready to process questions!")
46 |             print("Example questions you could ask:")
47 |             print("- 'Show me all users'")
48 |             print("- 'How many users are there?'")
49 |             print("- 'Show me users from New York'")
50 | 
51 |             # Test the schema display (through display manager)
52 |             print("\n🔍 Testing schema display:")
53 |             try:
54 |                 schema = cli.datasource_manager.current_datasource.get_table_schema("")
55 |                 cli.display_manager.show_schema(schema)
56 |             except Exception as e:
57 |                 print(f"Schema display error: {e}")
58 | 
59 |         else:
60 |             print("❌ Agent setup failed")
61 |     else:
62 |         print("❌ Datasource setup failed")
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     test_cli_with_sqlite()
67 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/agent/kernel_node.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable, List, Optional, Sequence, Union
 2 | 
 3 | from langchain_core.messages import ToolCall, ToolMessage
 4 | from langchain_core.runnables import RunnableConfig
 5 | from langchain_core.tools import BaseTool
 6 | from langgraph.prebuilt import ToolNode
 7 | 
 8 | 
 9 | class KernelNode(ToolNode):
10 |     """A ToolNode that can use a custom executor for running the tool."""
11 | 
12 |     def __init__(
13 |         self,
14 |         tools: Sequence[Union[BaseTool, Callable]],
15 |         executor: Callable,
16 |         *,
17 |         name: str = "kernel_tools",
18 |         tags: Optional[List[str]] = None,
19 |         handle_tool_errors: bool = True,
20 |     ):
21 |         super().__init__(
22 |             tools, name=name, tags=tags, handle_tool_errors=handle_tool_errors
23 |         )
24 |         self.executor = executor
25 | 
26 |     def _run_one(self, call: ToolCall, config: RunnableConfig) -> ToolMessage:
27 |         if invalid_tool_message := self._validate_tool_call(call):
28 |             return invalid_tool_message
29 | 
30 |         try:
31 |             input = {**call, **{"type": "tool_call"}}
32 |             tool = self.tools_by_name[call["name"]]
33 |             result = self.executor(tool, input, config)
34 |             tool_message = ToolMessage(
35 |                 content=str(result), name=call["name"], tool_call_id=call["id"]
36 |             )
37 |             return tool_message
38 |         except Exception as e:
39 |             if not self.handle_tool_errors:
40 |                 raise e
41 |             content = f"Error: {repr(e)}\n Please fix your mistakes."
42 |             return ToolMessage(content, name=call["name"], tool_call_id=call["id"])
43 | 
44 |     async def _arun_one(self, call: ToolCall, config: RunnableConfig) -> ToolMessage:
45 |         if invalid_tool_message := self._validate_tool_call(call):
46 |             return invalid_tool_message
47 | 
48 |         try:
49 |             input = {**call, **{"type": "tool_call"}}
50 |             tool = self.tools_by_name[call["name"]]
51 |             result = await self.executor(tool, input, config)
52 |             tool_message = ToolMessage(
53 |                 content=str(result), name=call["name"], tool_call_id=call["id"]
54 |             )
55 |             return tool_message
56 |         except Exception as e:
57 |             if not self.handle_tool_errors:
58 |                 raise e
59 |             content = f"Error: {repr(e)}\n Please fix your mistakes."
60 |             return ToolMessage(content, name=call["name"], tool_call_id=call["id"])
61 | 


--------------------------------------------------------------------------------
/research/nn/transformer_classifier.py:
--------------------------------------------------------------------------------
 1 | # Create a decoder only transformer classifier.
 2 | from typing import Any, Dict
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | 
 8 | class TransformerClassifier(nn.Module):
 9 |     """Create a transformer classifier."""
10 | 
11 |     def __init__(self, config: Dict[str, Any], num_labels: int):
12 |         super().__init__()
13 |         self.config = config
14 |         self.num_labels = num_labels
15 | 
16 |         self.embd = nn.Embedding(
17 |             num_embeddings=config["vocab_size"],
18 |             embedding_dim=config["hidden_size"],
19 |         )
20 | 
21 |         self.decoder_layer = nn.TransformerDecoderLayer(
22 |             d_model=config["hidden_size"],
23 |             nhead=config["num_attention_heads"],
24 |         )
25 | 
26 |         self.decoder = nn.TransformerDecoder(
27 |             decoder_layer=self.decoder_layer,
28 |             num_layers=config["num_hidden_layers"],
29 |         )
30 | 
31 |         self.classifier = nn.Linear(config["hidden_size"], num_labels)
32 | 
33 |     def forward(self, input_ids: torch.Tensor) -> torch.Tensor:
34 |         """Forward pass."""
35 |         embedded_seq = self.embd(input_ids)
36 | 
37 |         tgt_mask = nn.Transformer.generate_square_subsequent_mask(input_ids.size(1)).to(
38 |             input_ids.device
39 |         )
40 | 
41 |         x = self.decoder(embedded_seq, embedded_seq, tgt_mask=tgt_mask)
42 |         x = self.classifier(x[:, -1, :])
43 |         probs = torch.nn.functional.softmax(x, dim=-1)
44 |         return probs
45 | 
46 | 
47 | # example parameters
48 | config = {
49 |     "vocab_size": 1000,
50 |     "hidden_size": 128,
51 |     "num_attention_heads": 4,
52 |     "num_hidden_layers": 2,
53 | }
54 | 
55 | # create model
56 | model = TransformerClassifier(config, num_labels=2)
57 | # print(model)
58 | 
59 | # example input and target
60 | input_ids = torch.randint(0, 1000, (10, 10))
61 | targets = torch.randint(0, 2, (10,))
62 | print("input_ids", input_ids.size())
63 | print("targets", targets.size())
64 | 
65 | 
66 | def train(x, y):
67 |     loss_fn = nn.CrossEntropyLoss()
68 |     optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
69 |     for i in range(10):
70 |         optimizer.zero_grad()
71 |         output = model(x)
72 |         # print("output", output)
73 |         loss = loss_fn(output, y)
74 |         loss.backward()
75 |         optimizer.step()
76 |         print(loss.item())
77 |         if i == 9:
78 |             predicted_labels = torch.argmax(output, dim=-1)
79 |             print("predicted_labels", predicted_labels)
80 | 
81 | 
82 | train(input_ids, targets)
83 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/workspace/cell.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | 
 3 | import reflex as rx
 4 | from ryoma_lab.models.cell import (
 5 |     CellOutput,
 6 |     DataframeOutput,
 7 |     ErrorOutput,
 8 |     ExecuteResultOutput,
 9 |     StreamOutput,
10 |     UnknownOutput,
11 | )
12 | from ryoma_lab.states.workspace import WorkspaceState
13 | 
14 | 
15 | def render_dataframe(item: DataframeOutput) -> rx.Component:
16 |     return rx.data_table(data=item.dataframe)
17 | 
18 | 
19 | def render_error_output(item: ErrorOutput) -> rx.Component:
20 |     return rx.vstack(
21 |         rx.text(f"{item.ename}: {item.evalue}", color="red"),
22 |         rx.code(item.traceback, language="python", color_scheme="gray"),
23 |         align_items="stretch",
24 |         width="100%",
25 |     )
26 | 
27 | 
28 | def render_output_item(
29 |     item: Union[
30 |         CellOutput,
31 |         StreamOutput,
32 |         ExecuteResultOutput,
33 |         DataframeOutput,
34 |         ErrorOutput,
35 |         UnknownOutput,
36 |     ],
37 | ) -> rx.Component:
38 |     return rx.box(
39 |         rx.cond(
40 |             item.output_type == "stream",
41 |             render_stream_output(item),
42 |             rx.cond(
43 |                 item.output_type == "execute_result",
44 |                 render_execute_result(item),
45 |                 rx.cond(
46 |                     item.output_type == "dataframe",
47 |                     render_dataframe(item),
48 |                     rx.cond(
49 |                         item.output_type == "error",
50 |                         render_error_output(item),
51 |                         rx.text("Unknown output type"),
52 |                     ),
53 |                 ),
54 |             ),
55 |         )
56 |     )
57 | 
58 | 
59 | def render_stream_output(item: StreamOutput) -> rx.Component:
60 |     return rx.text(item.text)
61 | 
62 | 
63 | def render_execute_result(item: ExecuteResultOutput) -> rx.Component:
64 |     if WorkspaceState.data_contains_html(item):
65 |         return rx.html(f"{WorkspaceState.get_html_content(item)}")
66 |     elif WorkspaceState.data_contains_image(item):
67 |         return rx.image(
68 |             src=f"data:image/png;base64,{WorkspaceState.get_image_content(item)}"
69 |         )
70 |     else:
71 |         return rx.markdown(f"```{WorkspaceState.get_plain_text_content(item)}```")
72 | 
73 | 
74 | def render_output(
75 |     output: list[
76 |         Union[
77 |             StreamOutput,
78 |             ExecuteResultOutput,
79 |             DataframeOutput,
80 |             ErrorOutput,
81 |             UnknownOutput,
82 |         ]
83 |     ],
84 | ) -> rx.Component:
85 |     return rx.vstack(rx.foreach(output, render_output_item))
86 | 


--------------------------------------------------------------------------------
/assets/aita_black.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="701" zoomAndPan="magnify"
 2 |      viewBox="0 0 525.75 294.749989" height="393" preserveAspectRatio="xMidYMid meet" version="1.0">
 3 |     <defs>
 4 |         <g/>
 5 |         <clipPath id="5a3d4f1146">
 6 |             <path d="M 33.925781 196.796875 L 234.402344 196.796875 L 234.402344 219.09375 L 33.925781 219.09375 Z M 33.925781 196.796875 "
 7 |                   clip-rule="nonzero"/>
 8 |         </clipPath>
 9 |         <clipPath id="9677ad9b22">
10 |             <path d="M 260.5 70.09375 L 460.976562 70.09375 L 460.976562 92.386719 L 260.5 92.386719 Z M 260.5 70.09375 "
11 |                   clip-rule="nonzero"/>
12 |         </clipPath>
13 |     </defs>
14 |     <g clip-path="url(#5a3d4f1146)">
15 |         <path fill="#000000"
16 |               d="M 33.925781 196.796875 L 234.394531 196.796875 L 234.394531 219.09375 L 33.925781 219.09375 Z M 33.925781 196.796875 "
17 |               fill-opacity="1" fill-rule="nonzero"/>
18 |     </g>
19 |     <g clip-path="url(#9677ad9b22)">
20 |         <path fill="#000000"
21 |               d="M 260.5 70.09375 L 460.964844 70.09375 L 460.964844 92.386719 L 260.5 92.386719 Z M 260.5 70.09375 "
22 |               fill-opacity="1" fill-rule="nonzero"/>
23 |     </g>
24 |     <g fill="#000000" fill-opacity="1">
25 |         <g transform="translate(52.339588, 156.984113)">
26 |             <g>
27 |                 <path d="M 1.171875 0 L 23.171875 0 L 29.265625 -15.671875 L 65.25 -15.671875 L 71.34375 0 L 93.34375 0 L 57.359375 -90.625 L 37.15625 -90.625 Z M 34.5625 -31.84375 L 47.25 -65 L 60.078125 -31.84375 Z M 34.5625 -31.84375 "/>
28 |             </g>
29 |         </g>
30 |     </g>
31 |     <g fill="#000000" fill-opacity="1">
32 |         <g transform="translate(173.644951, 156.984113)">
33 |             <g>
34 |                 <path d="M 10.875 0 L 31.453125 0 L 31.453125 -90.625 L 10.875 -90.625 Z M 10.875 0 "/>
35 |             </g>
36 |         </g>
37 |     </g>
38 |     <g fill="#000000" fill-opacity="1">
39 |         <g transform="translate(265.902315, 218.501335)">
40 |             <g>
41 |                 <path d="M 32.625 0 L 53.21875 0 L 53.21875 -71.34375 L 81.953125 -71.34375 L 81.953125 -90.625 L 3.890625 -90.625 L 3.890625 -71.34375 L 32.625 -71.34375 Z M 32.625 0 "/>
42 |             </g>
43 |         </g>
44 |     </g>
45 |     <g fill="#000000" fill-opacity="1">
46 |         <g transform="translate(378.533941, 218.501335)">
47 |             <g>
48 |                 <path d="M 1.171875 0 L 23.171875 0 L 29.265625 -15.671875 L 65.25 -15.671875 L 71.34375 0 L 93.34375 0 L 57.359375 -90.625 L 37.15625 -90.625 Z M 34.5625 -31.84375 L 47.25 -65 L 60.078125 -31.84375 Z M 34.5625 -31.84375 "/>
49 |             </g>
50 |         </g>
51 |     </g>
52 | </svg>


--------------------------------------------------------------------------------
/assets/aita_white.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="701" zoomAndPan="magnify"
 2 |      viewBox="0 0 525.75 294.749989" height="393" preserveAspectRatio="xMidYMid meet" version="1.0">
 3 |     <defs>
 4 |         <g/>
 5 |         <clipPath id="0c0796ced1">
 6 |             <path d="M 33.925781 196.796875 L 234.402344 196.796875 L 234.402344 219.09375 L 33.925781 219.09375 Z M 33.925781 196.796875 "
 7 |                   clip-rule="nonzero"/>
 8 |         </clipPath>
 9 |         <clipPath id="a21bef65ff">
10 |             <path d="M 260.5 70.09375 L 460.976562 70.09375 L 460.976562 92.386719 L 260.5 92.386719 Z M 260.5 70.09375 "
11 |                   clip-rule="nonzero"/>
12 |         </clipPath>
13 |     </defs>
14 |     <g clip-path="url(#0c0796ced1)">
15 |         <path fill="#ffffff"
16 |               d="M 33.925781 196.796875 L 234.394531 196.796875 L 234.394531 219.09375 L 33.925781 219.09375 Z M 33.925781 196.796875 "
17 |               fill-opacity="1" fill-rule="nonzero"/>
18 |     </g>
19 |     <g clip-path="url(#a21bef65ff)">
20 |         <path fill="#ffffff"
21 |               d="M 260.5 70.09375 L 460.964844 70.09375 L 460.964844 92.386719 L 260.5 92.386719 Z M 260.5 70.09375 "
22 |               fill-opacity="1" fill-rule="nonzero"/>
23 |     </g>
24 |     <g fill="#ffffff" fill-opacity="1">
25 |         <g transform="translate(52.339588, 156.984113)">
26 |             <g>
27 |                 <path d="M 1.171875 0 L 23.171875 0 L 29.265625 -15.671875 L 65.25 -15.671875 L 71.34375 0 L 93.34375 0 L 57.359375 -90.625 L 37.15625 -90.625 Z M 34.5625 -31.84375 L 47.25 -65 L 60.078125 -31.84375 Z M 34.5625 -31.84375 "/>
28 |             </g>
29 |         </g>
30 |     </g>
31 |     <g fill="#ffffff" fill-opacity="1">
32 |         <g transform="translate(173.644951, 156.984113)">
33 |             <g>
34 |                 <path d="M 10.875 0 L 31.453125 0 L 31.453125 -90.625 L 10.875 -90.625 Z M 10.875 0 "/>
35 |             </g>
36 |         </g>
37 |     </g>
38 |     <g fill="#ffffff" fill-opacity="1">
39 |         <g transform="translate(265.902315, 218.501335)">
40 |             <g>
41 |                 <path d="M 32.625 0 L 53.21875 0 L 53.21875 -71.34375 L 81.953125 -71.34375 L 81.953125 -90.625 L 3.890625 -90.625 L 3.890625 -71.34375 L 32.625 -71.34375 Z M 32.625 0 "/>
42 |             </g>
43 |         </g>
44 |     </g>
45 |     <g fill="#ffffff" fill-opacity="1">
46 |         <g transform="translate(378.533941, 218.501335)">
47 |             <g>
48 |                 <path d="M 1.171875 0 L 23.171875 0 L 29.265625 -15.671875 L 65.25 -15.671875 L 71.34375 0 L 93.34375 0 L 57.359375 -90.625 L 37.15625 -90.625 Z M 34.5625 -31.84375 L 47.25 -65 L 60.078125 -31.84375 Z M 34.5625 -31.84375 "/>
49 |             </g>
50 |         </g>
51 |     </g>
52 | </svg>


--------------------------------------------------------------------------------
/tests/unit_tests/test_lazy_imports.py:
--------------------------------------------------------------------------------
 1 | """Test that lazy imports work correctly for datasources."""
 2 | 
 3 | import sys
 4 | from unittest.mock import patch
 5 | 
 6 | import pytest
 7 | 
 8 | 
 9 | def test_datasource_factory_lazy_import():
10 |     """Test that DataSourceFactory doesn't import datasource modules at import time."""
11 |     # This import should succeed regardless of which datasources are installed
12 |     from ryoma_ai.datasource.factory import DataSourceFactory, get_supported_datasources
13 | 
14 |     # Getting supported datasources should work
15 |     datasources = get_supported_datasources()
16 |     assert len(datasources) > 0
17 | 
18 |     # Test that we can create datasources when dependencies are available
19 |     try:
20 |         # Try to create sqlite datasource (usually available)
21 |         sqlite_ds = DataSourceFactory.create_datasource("sqlite", ":memory:")
22 |         assert sqlite_ds is not None
23 |     except ImportError:
24 |         # If sqlite isn't available, that's fine for this test
25 |         pass
26 | 
27 |     # Mock import_module to simulate missing dependencies for a specific datasource
28 |     with patch("ryoma_ai.datasource.factory.import_module") as mock_import:
29 |         mock_import.side_effect = ImportError("No module named 'duckdb'")
30 | 
31 |         # Creating a duckdb datasource should fail with helpful error
32 |         with pytest.raises(ImportError) as exc_info:
33 |             DataSourceFactory.create_datasource("duckdb", ":memory:")
34 | 
35 |         assert "Failed to import duckdb datasource" in str(exc_info.value)
36 |         assert "Please install required dependencies" in str(exc_info.value)
37 | 
38 | 
39 | def test_cli_import_without_all_dependencies():
40 |     """Test that CLI modules can be imported without all datasource dependencies."""
41 |     # Remove optional dependencies from sys.modules
42 |     optional_deps = ["duckdb", "psycopg", "snowflake", "google.cloud.bigquery"]
43 |     original_modules = {}
44 | 
45 |     for dep in optional_deps:
46 |         if dep in sys.modules:
47 |             original_modules[dep] = sys.modules[dep]
48 |             del sys.modules[dep]
49 | 
50 |     try:
51 |         with patch.dict("sys.modules", {dep: None for dep in optional_deps}):
52 |             # These imports should succeed even without optional dependencies
53 |             from ryoma_ai.cli import main
54 |             from ryoma_ai.cli.app import RyomaAI
55 |             from ryoma_ai.cli.command_handler import CommandHandler
56 | 
57 |             # Verify imports succeeded
58 |             assert main is not None
59 |             assert RyomaAI is not None
60 |             assert CommandHandler is not None
61 |     finally:
62 |         # Restore original modules
63 |         for dep, module in original_modules.items():
64 |             sys.modules[dep] = module
65 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/tool.py:
--------------------------------------------------------------------------------
 1 | """The Tool Page."""
 2 | 
 3 | import reflex as rx
 4 | import reflex_chakra as rc
 5 | from ryoma_lab.states.tool import Tool, ToolState
 6 | 
 7 | 
 8 | def tool_card(tool: Tool):
 9 |     """Create a tool card."""
10 |     return rx.dialog.root(
11 |         rx.dialog.trigger(
12 |             rc.card(
13 |                 rc.text(
14 |                     tool.description,
15 |                     no_of_lines=3,
16 |                 ),
17 |                 header=rc.heading(tool.name, size="md"),
18 |                 # adjust the size and make it scrollable
19 |                 direction="column",
20 |                 overflow="auto",
21 |                 height="300px",
22 |                 width="200px",
23 |                 margin="20px",
24 |                 cursor="pointer",
25 |                 _hover={"background_color": rx.color("gray", 2)},
26 |             ),
27 |         ),
28 |         rx.dialog.content(
29 |             rx.dialog.title(tool.name, size="6"),
30 |             rx.dialog.description(tool.description),
31 |             rx.vstack(
32 |                 rx.heading("Tool Arguments", size="4"),
33 |                 rx.foreach(
34 |                     tool.args,
35 |                     lambda arg: rx.flex(
36 |                         rx.text(
37 |                             arg.name,
38 |                             as_="div",
39 |                             size="2",
40 |                             mb="1",
41 |                             weight="bold",
42 |                         ),
43 |                         rx.cond(
44 |                             arg.description is not None,
45 |                             rx.text(
46 |                                 arg.description,
47 |                             ),
48 |                         ),
49 |                         direction="column",
50 |                         spacing="3",
51 |                     ),
52 |                 ),
53 |                 margin_top="20px",
54 |                 width="100%",
55 |             ),
56 |             rx.flex(
57 |                 rx.dialog.close(
58 |                     rx.button("Close", size="2"),
59 |                 ),
60 |                 justify="end",
61 |             ),
62 |         ),
63 |     )
64 | 
65 | 
66 | def content_grid():
67 |     """Create a content grid."""
68 |     return rc.flex(
69 |         rx.foreach(
70 |             ToolState.tools,
71 |             lambda tool: tool_card(tool),
72 |         ),
73 |         flex_wrap="wrap",
74 |     )
75 | 
76 | 
77 | def tool_component() -> rx.Component:
78 |     return rx.vstack(
79 |         rc.text("A suite of tools to help you with analyzing your data."),
80 |         rx.box(
81 |             content_grid(),
82 |             margin_top="20px",
83 |             width="100%",
84 |         ),
85 |         # make the page full width
86 |         width="100%",
87 |     )
88 | 


--------------------------------------------------------------------------------
/packages/ryoma_lab/ryoma_lab/components/react_resizable_panels.py:
--------------------------------------------------------------------------------
 1 | """Reflex custom component ResizablePanels."""
 2 | 
 3 | # For wrapping react guide, visit https://reflex.dev/docs/wrapping-react/overview/
 4 | 
 5 | from types import SimpleNamespace
 6 | from typing import Any, Literal
 7 | 
 8 | import reflex as rx
 9 | 
10 | LiteralDirection = Literal["horizontal", "vertical"]
11 | 
12 | lib_name = "react-resizable-panels@^2.0.19"
13 | 
14 | 
15 | class ResizablePanels(rx.Component):
16 |     """ResizablePanels component."""
17 | 
18 |     # The React library to wrap.
19 |     library = lib_name
20 | 
21 | 
22 | class PanelRoot(rx.el.Div):
23 |     def add_style(self) -> dict[str, Any] | None:
24 |         return {"width": "100%", "height": "100%"}
25 | 
26 | 
27 | class PanelGroup(ResizablePanels):
28 |     tag = "PanelGroup"
29 | 
30 |     alias = "ResizablePanelGroup"
31 | 
32 |     # Unique id to auto-save the group layout via localStorage
33 |     auto_save_id: rx.Var[str]
34 | 
35 |     # Group orientation
36 |     direction: rx.Var[LiteralDirection]
37 | 
38 |     on_layout: rx.EventHandler[lambda e0: [e0]]
39 | 
40 |     # not sure how to make this one works
41 |     # storage: rx.Var[Any]
42 | 
43 | 
44 | class Panel(ResizablePanels):
45 |     tag = "Panel"
46 | 
47 |     alias = "ResizablePanel"
48 | 
49 |     # Whether the panel is collapsible
50 |     collapsible: rx.Var[bool]
51 | 
52 |     # Panel should collapse to this size
53 |     collapsed_size: rx.Var[int]
54 | 
55 |     # Default size of the panel (should be a number between 1 - 100)
56 |     default_size: rx.Var[int]
57 | 
58 |     # Maximum size of the panel (should be a number between 1 - 100)
59 |     max_size: rx.Var[int]
60 | 
61 |     # Minimum size of the panel (should be a number between 1 - 100)
62 |     min_size: rx.Var[int]
63 | 
64 |     # Event handlers triggered when the panel is collapsed
65 |     on_collapse: rx.EventHandler[lambda: []]
66 | 
67 |     # Event handlers triggered when the panel is expanded
68 |     on_expand: rx.EventHandler[lambda: []]
69 | 
70 |     # Event handlers triggered when the panel is resized
71 |     on_resize: rx.EventHandler[lambda e0: [e0]]
72 | 
73 |     # Order of the panel within the group
74 |     order: rx.Var[int]
75 | 
76 | 
77 | class PanelResizeHandle(ResizablePanels):
78 |     tag = "PanelResizeHandle"
79 | 
80 |     alias = "ResizablePanelResizeHandle"
81 | 
82 |     def add_style(self) -> dict[str, Any] | None:
83 |         return {
84 |             "width": "7px",
85 |             "background": rx.color("accent", 7),
86 |             "opacity": 0.0,
87 |             "_hover": {"opacity": 1.0},
88 |         }
89 | 
90 | 
91 | class ResizablePanelsNamespace(SimpleNamespace):
92 |     group = staticmethod(PanelGroup.create)
93 |     panel = staticmethod(Panel.create)
94 |     handle = staticmethod(PanelResizeHandle.create)
95 | 
96 | 
97 | resizable_panels = ResizablePanelsNamespace()
98 | 


--------------------------------------------------------------------------------
/packages/ryoma_ai/ryoma_ai/datasource/mysql.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any, Optional
 3 | 
 4 | import ibis
 5 | from databuilder.extractor.sql_alchemy_extractor import SQLAlchemyExtractor
 6 | from databuilder.job.job import DefaultJob
 7 | from databuilder.loader.base_loader import Loader
 8 | from databuilder.task.task import DefaultTask
 9 | from pyhocon import ConfigFactory
10 | from ryoma_ai.datasource.sql import SqlDataSource
11 | 
12 | 
13 | class MySqlDataSource(SqlDataSource):
14 |     def get_query_plan(self, query: str) -> Any:
15 |         pass
16 | 
17 |     def __init__(
18 |         self,
19 |         database: Optional[str] = None,
20 |         db_schema: Optional[str] = None,
21 |         connection_url: Optional[str] = None,
22 |         username: Optional[str] = None,
23 |         password: Optional[str] = None,
24 |         host: Optional[str] = None,
25 |         port: Optional[int] = None,
26 |     ):
27 |         super().__init__(database=database, db_schema=db_schema)
28 |         self.username = username
29 |         self.password = password
30 |         self.host = host
31 |         self.port = port
32 |         self.connection_url = connection_url
33 | 
34 |     def _connect(self, **kwargs):
35 |         try:
36 |             return ibis.mysql.connect(
37 |                 user=self.username,
38 |                 password=self.password,
39 |                 host=self.host,
40 |                 port=self.port,
41 |                 database=self.database,
42 |                 **kwargs,
43 |             )
44 |         except Exception as e:
45 |             self._handle_connection_error(e, "mysql")
46 | 
47 |     def connection_string(self):
48 |         return f"mysql+mysqlconnector://{self.username}:{self.password}@{self.host}:{self.port}/{self.database}"
49 | 
50 |     def crawl_catalog(self, loader: Loader, where_clause_suffix: Optional[str] = ""):
51 |         from databuilder.extractor.mysql_metadata_extractor import (
52 |             MysqlMetadataExtractor,
53 |         )
54 | 
55 |         logging.info("Crawling data catalog from Mysql")
56 |         job_config = ConfigFactory.from_dict(
57 |             {
58 |                 "extractor.mysql_metadata.{}".format(
59 |                     MysqlMetadataExtractor.WHERE_CLAUSE_SUFFIX_KEY
60 |                 ): where_clause_suffix,
61 |                 "extractor.mysql_metadata.{}".format(
62 |                     MysqlMetadataExtractor.USE_CATALOG_AS_CLUSTER_NAME
63 |                 ): True,
64 |                 "extractor.mysql_metadata.extractor.sqlalchemy.{}".format(
65 |                     SQLAlchemyExtractor.CONN_STRING
66 |                 ): self.connection_string(),
67 |             }
68 |         )
69 |         job = DefaultJob(
70 |             conf=job_config,
71 |             task=DefaultTask(extractor=MysqlMetadataExtractor(), loader=loader),
72 |         )
73 |         job.launch()
74 | 
75 | 
76 | class MySqlConfig:
77 |     pass
78 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # For the full list of built-in configuration values, see the documentation:
 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 5 | 
 6 | # -- Project information -----------------------------------------------------
 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
 8 | 
 9 | import os
10 | import sys
11 | import warnings
12 | 
13 | sys.path.append(os.path.abspath(os.path.dirname(__file__)))
14 | 
15 | project = "Ryoma"
16 | copyright = "2024, WuHen-Li"
17 | author = "WuHen-Li"
18 | release = "v1.0.0-beta"
19 | 
20 | # -- General configuration ---------------------------------------------------
21 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
22 | 
23 | extensions = [
24 |     "myst_parser",
25 |     "sphinx_copybutton",
26 |     "sphinx_exec_code",
27 |     "sphinx_tabs.tabs",
28 |     "sphinx.ext.autodoc",
29 |     "sphinx.ext.autosummary",
30 |     "sphinx.ext.coverage",
31 |     "sphinx.ext.doctest",
32 |     "sphinx.ext.githubpages",
33 |     "sphinx.ext.graphviz",
34 |     "sphinx.ext.ifconfig",
35 |     "sphinx.ext.intersphinx",
36 |     "sphinx.ext.mathjax",
37 |     "sphinx.ext.napoleon",
38 |     "sphinx.ext.viewcode",
39 | ]
40 | 
41 | myst_enable_extensions = [
42 |     "amsmath",
43 |     "attrs_inline",
44 |     "colon_fence",
45 |     "deflist",
46 |     "dollarmath",
47 |     "fieldlist",
48 |     "html_admonition",
49 |     "html_image",
50 |     "linkify",
51 |     "replacements",
52 |     "smartquotes",
53 |     "strikethrough",
54 |     "substitution",
55 |     "tasklist",
56 | ]
57 | 
58 | coverage_show_missing_items = True
59 | exclude_patterns = []
60 | graphviz_output_format = "svg"
61 | html_css_files = ["css/custom.css"]
62 | html_favicon = "modelinfer.png"
63 | html_sidebars = {}
64 | html_static_path = ["_static"]
65 | html_theme = "furo"
66 | language = "en"
67 | mathdef_link_only = True
68 | master_doc = "index"
69 | pygments_style = "default"
70 | source_suffix = [".rst", ".md"]
71 | templates_path = ["_templates"]
72 | 
73 | html_context = {
74 |     "default_mode": "auto",  # auto: the documentation theme will follow the system default that you have set (light or dark)
75 | }
76 | 
77 | html_theme_options = {
78 |     "light_logo": "ryoma.png",
79 |     "dark_logo": "ryoma.png",
80 | }
81 | 
82 | intersphinx_mapping = {
83 |     "numpy": ("https://numpy.org/doc/stable/", None),
84 |     "python": (f"https://docs.python.org/{3.10}/", None),
85 |     "scipy": ("https://docs.scipy.org/doc/scipy/", None),
86 |     "torch": ("https://pytorch.org/docs/stable/", None),
87 | }
88 | 
89 | sphinx_gallery_conf = {
90 |     "examples_dirs": ["examples"],
91 |     "gallery_dirs": ["auto_examples", "auto_tutorial"],
92 |     "capture_repr": ("_repr_html_", "__repr__"),
93 |     "ignore_repr_types": r"matplotlib.text|matplotlib.axes",
94 | }
95 | 
96 | warnings.filterwarnings("ignore", category=FutureWarning)
97 | 


--------------------------------------------------------------------------------