├── tests ├── __init__.py ├── e2e │ ├── __init__.py │ ├── ryoma_ai │ │ ├── __init__.py │ │ ├── test_llm.py │ │ ├── test_datasource.py │ │ └── test_agent.py │ └── download_gpt4all_model.py └── unit_tests │ ├── __init__.py │ ├── test_cli.py │ ├── datasource │ └── test_duckdb.py │ ├── test_prompt_template.py │ ├── test_catalog.py │ ├── test_datasource.py │ ├── test_agent.py │ ├── test_tool.py │ ├── test_cli_fixed.py │ └── test_lazy_imports.py ├── packages ├── ryoma_lab │ ├── README.md │ ├── ryoma_lab │ │ ├── models │ │ │ ├── __init__.py │ │ │ ├── embedding.py │ │ │ ├── datasource.py │ │ │ ├── agent.py │ │ │ ├── tool.py │ │ │ ├── kernel.py │ │ │ ├── vector_store.py │ │ │ ├── prompt.py │ │ │ ├── cell.py │ │ │ └── data_catalog.py │ │ ├── components │ │ │ ├── __init__.py │ │ │ ├── workspace │ │ │ │ ├── __init__.py │ │ │ │ └── cell.py │ │ │ ├── loading_icon.py │ │ │ ├── code_editor.py │ │ │ ├── table.py │ │ │ ├── upload.py │ │ │ ├── reactflow.py │ │ │ ├── embedding.py │ │ │ ├── modal.py │ │ │ ├── model_selector.py │ │ │ ├── react_rnd.py │ │ │ ├── tool.py │ │ │ └── react_resizable_panels.py │ │ ├── services │ │ │ ├── kernel │ │ │ │ ├── __init__.py │ │ │ │ ├── pythonkernel.py │ │ │ │ ├── service.py │ │ │ │ ├── base.py │ │ │ │ └── sqlkernel.py │ │ │ ├── embedding.py │ │ │ └── user.py │ │ ├── __init__.py │ │ ├── templates │ │ │ └── __init__.py │ │ ├── pages │ │ │ ├── __init__.py │ │ │ ├── document.py │ │ │ └── settings.py │ │ ├── ryoma_lab.py │ │ ├── states │ │ │ ├── utils.py │ │ │ ├── ai.py │ │ │ ├── tool.py │ │ │ └── base.py │ │ ├── cli.py │ │ └── styles.py │ ├── setup.py │ └── pyproject.toml └── ryoma_ai │ ├── ryoma_ai │ ├── api │ │ └── __init__.py │ ├── llm │ │ └── __init__.py │ ├── embedding │ │ ├── __init__.py │ │ ├── config.py │ │ ├── factory.py │ │ └── client.py │ ├── datasource │ │ ├── __init__.py │ │ ├── sqlite.py │ │ ├── nosql.py │ │ ├── bigquery.py │ │ ├── base.py │ │ ├── file.py │ │ ├── duckdb.py │ │ └── mysql.py │ ├── vector_store │ │ └── __init__.py │ ├── cli │ │ ├── main.py │ │ └── __init__.py │ ├── __init__.py │ ├── tool │ │ ├── __init__.py │ │ ├── pyarrow_tool.py │ │ ├── pandas_tool.py │ │ ├── spark_tool.py │ │ └── python_tool.py │ ├── catalog │ │ ├── exceptions.py │ │ └── __init__.py │ ├── store │ │ ├── __init__.py │ │ ├── config.py │ │ └── exceptions.py │ ├── prompt │ │ ├── base.py │ │ └── __init__.py │ ├── agent │ │ ├── __init__.py │ │ ├── python_agent.py │ │ ├── arrow_agent.py │ │ ├── embedding.py │ │ ├── internals │ │ │ ├── sql_log_agent.py │ │ │ └── metadata_agent.py │ │ ├── factory.py │ │ ├── pandas_agent.py │ │ ├── spark_agent.py │ │ └── kernel_node.py │ ├── models │ │ ├── agent.py │ │ ├── catalog.py │ │ └── datasource.py │ └── states.py │ ├── setup.py │ ├── tests │ └── llm │ │ └── __init__.py │ ├── .editorconfig │ ├── README.md │ └── pyproject.toml ├── research ├── .gitignore ├── azure_openai.py └── nn │ └── transformer_classifier.py ├── alembic ├── README ├── script.py.mako ├── versions │ └── 34dd3ed73def_.py └── env.py ├── docs ├── source │ ├── ryoma-lab │ │ ├── data-source.md │ │ ├── index.md │ │ ├── chat.md │ │ └── ryomalab.md │ ├── reference │ │ ├── data-sources │ │ │ ├── bigquery.md │ │ │ ├── file.md │ │ │ ├── snowflake.md │ │ │ ├── postgresql.md │ │ │ └── index.md │ │ ├── tool │ │ │ ├── IPython.md │ │ │ ├── pandas.md │ │ │ ├── sql.md │ │ │ ├── pyspark.md │ │ │ ├── pyarrow.md │ │ │ └── index.md │ │ ├── agent │ │ │ ├── python.md │ │ │ ├── spark.md │ │ │ ├── pyarrow.md │ │ │ └── pandas.md │ │ ├── index.md │ │ └── api │ │ │ └── index.md │ ├── _static │ │ ├── ryoma.png │ │ └── css │ │ │ └── custom.css │ ├── architecture │ │ ├── img.png │ │ ├── img_1.png │ │ ├── Architecture_v1.png │ │ ├── multi-agent-routing-clean.png │ │ ├── enhanced-sql-agent-workflow.png │ │ ├── multi-agent-routing-system.png │ │ ├── reforce-sql-agent-workflow.png │ │ ├── architecture.md │ │ ├── enhanced-sql-agent-workflow.mmd │ │ ├── index.md │ │ ├── reforce-sql-agent-workflow.mmd │ │ └── multi-agent-routing-clean.mmd │ ├── tech-specs │ │ ├── Architecture_v1.png │ │ ├── index.md │ │ └── tech_spec_v1.md │ ├── roadmap │ │ ├── index.md │ │ └── roadmap.md │ ├── contribution │ │ ├── index.md │ │ └── contribution.md │ ├── installation │ │ └── index.md │ ├── requirements.txt │ ├── getting-started │ │ └── index.md │ └── conf.py ├── Makefile └── make.bat ├── .gitbook.yaml ├── assets ├── ui.png ├── favicon.ico ├── paneleft.svg ├── images │ └── coverage.svg ├── chakra_color_mode_provider.js ├── github.svg ├── aita_black.svg └── aita_white.svg ├── package.json ├── setup.cfg ├── .claude └── settings.local.json ├── examples ├── file_example.py ├── example_arrow.py └── e2e_example.py ├── .github ├── ISSUE_TEMPLATE │ ├── config.yml │ ├── question.md │ ├── feature_request.md │ └── bug_report.md ├── workflows │ ├── release-drafter.yml │ ├── build.yml │ ├── gitpages.yml │ └── main_ryoma-demo.yml ├── .stale.yml ├── release-drafter.yml ├── dependabot.yml └── PULL_REQUEST_TEMPLATE.md ├── CHANGELOG.md ├── config.yaml ├── Dockerfile ├── .dockerignore ├── rxconfig.py ├── .pre-commit-config.yaml ├── SECURITY.md ├── scripts └── langchain_test.py ├── example_config.json ├── TODOs.md └── pyproject.toml /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/e2e/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/ryoma_lab/README.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/unit_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /research/.gitignore: -------------------------------------------------------------------------------- 1 | papers 2 | -------------------------------------------------------------------------------- /tests/e2e/ryoma_ai/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/api/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/llm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/embedding/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /alembic/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. -------------------------------------------------------------------------------- /docs/source/ryoma-lab/data-source.md: -------------------------------------------------------------------------------- 1 | # Data Source -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/datasource/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/vector_store/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/source/reference/data-sources/bigquery.md: -------------------------------------------------------------------------------- 1 | # Bigquery -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/services/kernel/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/source/reference/data-sources/file.md: -------------------------------------------------------------------------------- 1 | # File Data Source -------------------------------------------------------------------------------- /packages/ryoma_ai/setup.py: -------------------------------------------------------------------------------- 1 | __import__("setuptools").setup() 2 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/workspace/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/ryoma_lab/setup.py: -------------------------------------------------------------------------------- 1 | __import__("setuptools").setup() 2 | -------------------------------------------------------------------------------- /docs/source/reference/data-sources/snowflake.md: -------------------------------------------------------------------------------- 1 | # Snowflake Data Source -------------------------------------------------------------------------------- /.gitbook.yaml: -------------------------------------------------------------------------------- 1 | root: ./docs/ 2 | 3 | structure: 4 | readme: README.md 5 | -------------------------------------------------------------------------------- /docs/source/reference/data-sources/postgresql.md: -------------------------------------------------------------------------------- 1 | # Postgres Data Source 2 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/__init__.py: -------------------------------------------------------------------------------- 1 | """Base template for Reflex.""" 2 | -------------------------------------------------------------------------------- /assets/ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/assets/ui.png -------------------------------------------------------------------------------- /assets/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/assets/favicon.ico -------------------------------------------------------------------------------- /packages/ryoma_ai/tests/llm/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for LLM provider functionality. 3 | """ 4 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "@mermaid-js/mermaid-cli": "^11.9.0" 4 | } 5 | } 6 | -------------------------------------------------------------------------------- /docs/source/_static/ryoma.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/_static/ryoma.png -------------------------------------------------------------------------------- /docs/source/architecture/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/img.png -------------------------------------------------------------------------------- /docs/source/architecture/img_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/img_1.png -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [darglint] 2 | # https://github.com/terrencepreilly/darglint 3 | strictness = long 4 | docstring_style = google 5 | -------------------------------------------------------------------------------- /.claude/settings.local.json: -------------------------------------------------------------------------------- 1 | { 2 | "permissions": { 3 | "allow": [ 4 | "Bash(python:*)" 5 | ], 6 | "deny": [] 7 | } 8 | } -------------------------------------------------------------------------------- /docs/source/architecture/Architecture_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/Architecture_v1.png -------------------------------------------------------------------------------- /docs/source/tech-specs/Architecture_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/tech-specs/Architecture_v1.png -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/templates/__init__.py: -------------------------------------------------------------------------------- 1 | from .template import ThemeState, template 2 | 3 | __all__ = ["ThemeState", "template"] 4 | -------------------------------------------------------------------------------- /docs/source/architecture/multi-agent-routing-clean.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/multi-agent-routing-clean.png -------------------------------------------------------------------------------- /docs/source/architecture/enhanced-sql-agent-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/enhanced-sql-agent-workflow.png -------------------------------------------------------------------------------- /docs/source/architecture/multi-agent-routing-system.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/multi-agent-routing-system.png -------------------------------------------------------------------------------- /docs/source/architecture/reforce-sql-agent-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/project-ryoma/ryoma/HEAD/docs/source/architecture/reforce-sql-agent-workflow.png -------------------------------------------------------------------------------- /docs/source/reference/tool/IPython.md: -------------------------------------------------------------------------------- 1 | # PythonTool 2 | 3 | PythonTool is a tool that allows you to run python script in IPython Kernel ([IPython](https://ipython.org/)). -------------------------------------------------------------------------------- /docs/source/roadmap/index.md: -------------------------------------------------------------------------------- 1 | (ryoma-roadmap)= 2 | 3 | # Roadmap 4 | 5 | Ryoma roadmap. 6 | 7 | ```{toctree} 8 | :maxdepth: 2 9 | 10 | roadmap 11 | ``` 12 | -------------------------------------------------------------------------------- /examples/file_example.py: -------------------------------------------------------------------------------- 1 | from ryoma_ai.datasource.file import FileDataSource 2 | 3 | f = FileDataSource("./creditcard.csv") 4 | 5 | ds = f.to_arrow(format="csv") 6 | 7 | ds.to_table() 8 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | # Configuration: https://help.github.com/en/github/building-a-strong-community/configuring-issue-templates-for-your-repository 2 | 3 | blank_issues_enabled: false 4 | -------------------------------------------------------------------------------- /docs/source/contribution/index.md: -------------------------------------------------------------------------------- 1 | (ryoma-contribution)= 2 | 3 | # Contribution 4 | 5 | Ryoma contributed documentation. 6 | 7 | ```{toctree} 8 | :maxdepth: 2 9 | 10 | contribution 11 | ``` 12 | -------------------------------------------------------------------------------- /docs/source/installation/index.md: -------------------------------------------------------------------------------- 1 | (ryoma-installation)= 2 | 3 | # Installation 4 | 5 | Ryoma installation instructions. 6 | 7 | ```{toctree} 8 | :maxdepth: 2 9 | 10 | installation 11 | ``` 12 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## [project-title] Changelog 2 | 3 | 4 | # x.y.z (yyyy-mm-dd) 5 | 6 | *Features* 7 | * ... 8 | 9 | *Bug Fixes* 10 | * ... 11 | 12 | *Breaking Changes* 13 | * ... 14 | -------------------------------------------------------------------------------- /docs/source/reference/tool/pandas.md: -------------------------------------------------------------------------------- 1 | # PandasTool 2 | Pandas tools are used by PandasAgent to interact with the data leveraging Pandas API. 3 | 4 | ## Source 5 | 6 | * [Pandas Tool](../../../ryoma_ai/tool/pandas.py) -------------------------------------------------------------------------------- /docs/source/reference/tool/sql.md: -------------------------------------------------------------------------------- 1 | # Sql Tools 2 | Sql Tools are used to interact with sql databases, and used by `SqlAgent` to query sql queries. 3 | 4 | ## Source 5 | * [sql tool](../../../ryoma_ai/tool/sql_tool.py) -------------------------------------------------------------------------------- /docs/source/requirements.txt: -------------------------------------------------------------------------------- 1 | furo 2 | markupsafe 3 | matplotlib 4 | myst-parser[linkify] 5 | pillow 6 | pydot 7 | sphinx-copybutton 8 | sphinx-exec-code 9 | sphinx-gallery 10 | sphinx-tabs 11 | sphinx 12 | -------------------------------------------------------------------------------- /docs/source/ryoma-lab/index.md: -------------------------------------------------------------------------------- 1 | (ryoma-aita-lab)= 2 | 3 | # Aita-lab 4 | 5 | Ryoma aita-lab documentation. 6 | 7 | ```{toctree} 8 | :maxdepth: 2 9 | 10 | chat 11 | data-source 12 | ryomalab 13 | ``` 14 | -------------------------------------------------------------------------------- /docs/source/reference/tool/pyspark.md: -------------------------------------------------------------------------------- 1 | # pyspark tool 2 | Pyspark Tools are used by PysparkAgent to interact with the data leveraging Pyspark API. 3 | 4 | ## Source 5 | * [pyspark tool](../../../ryoma_ai/tool/pyspark.py) 6 | -------------------------------------------------------------------------------- /docs/source/reference/tool/pyarrow.md: -------------------------------------------------------------------------------- 1 | # PyArrow Tool 2 | ## Overview 3 | PyArrow Tools are used by PyArrowAgent to interact with the data leveraging PyArrow API. 4 | 5 | ## Source 6 | * [PyArrow Tool](../../../ryoma_ai/tool/pyarrow.py) -------------------------------------------------------------------------------- /docs/source/tech-specs/index.md: -------------------------------------------------------------------------------- 1 | (ryoma-tech-specs)= 2 | 3 | # Tech specs 4 | 5 | This document describes the technical specifications of the project. 6 | 7 | ```{toctree} 8 | :maxdepth: 2 9 | 10 | tech_spec_v1 11 | ``` 12 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/models/embedding.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional 2 | 3 | import reflex as rx 4 | 5 | 6 | class Embedding(rx.Base): 7 | model: str 8 | model_parameters: Optional[dict[str, Optional[Any]]] = None 9 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/pages/__init__.py: -------------------------------------------------------------------------------- 1 | from .ai import ai 2 | from .datasource import datasource 3 | from .settings import settings 4 | from .workspace import workspace 5 | 6 | __all__ = ["ai", "datasource", "settings", "workspace"] 7 | -------------------------------------------------------------------------------- /docs/source/reference/tool/index.md: -------------------------------------------------------------------------------- 1 | (ryoma-tool)= 2 | 3 | # Tool Documentation 4 | 5 | Ryoma tool is an interface that can be used by AI agents to interact with the data platform. 6 | 7 | ```{toctree} 8 | :maxdepth: 2 9 | 10 | IPython 11 | pandas 12 | pyarrow 13 | pyspark 14 | sql 15 | ``` 16 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/cli/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Ryoma AI CLI Main Entry Point 4 | 5 | This module serves as the entry point for the Ryoma AI CLI application. 6 | The main implementation has been refactored into modular components in the cli package. 7 | """ 8 | 9 | from ryoma_ai.cli.app import main 10 | 11 | if __name__ == "__main__": 12 | main() 13 | -------------------------------------------------------------------------------- /docs/source/architecture/architecture.md: -------------------------------------------------------------------------------- 1 | # Architecture 2 | 3 | This document describes the architecture of the project. 4 | 5 | ## Overview 6 | 7 | ### V1 Architecture 8 | 9 | ![Architecture](Architecture_v1.png) 10 | 11 | ![img_1.png](img_1.png) 12 | 13 | ### E2E User flow 14 | 15 | ![img.png](img.png) 16 | 17 | E2E user flow of Ryoma is shown in the above diagram. 18 | 19 | 20 | -------------------------------------------------------------------------------- /docs/source/reference/agent/python.md: -------------------------------------------------------------------------------- 1 | # Python Agent 2 | 3 | The Python agent is an Ryoma agent that run python script in IPython Kernel ([IPython](https://ipython.org/)). 4 | 5 | ## Example 6 | 7 | 8 | ```python 9 | from ryoma_ai.agent.python import PythonAgent 10 | 11 | python_agent = PythonAgent("gpt-3.5-turbo") 12 | 13 | python_agent.stream("print('Hello, World!')") 14 | ``` 15 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/__init__.py: -------------------------------------------------------------------------------- 1 | """AI Powered Data Platform""" 2 | 3 | from importlib import metadata as importlib_metadata 4 | 5 | 6 | def get_version() -> str: 7 | try: 8 | return importlib_metadata.version(__name__) 9 | except importlib_metadata.PackageNotFoundError: # pragma: no cover 10 | return "unknown" 11 | 12 | 13 | version: str = get_version() 14 | -------------------------------------------------------------------------------- /docs/source/reference/data-sources/index.md: -------------------------------------------------------------------------------- 1 | (ryoma-data-sources)= 2 | 3 | # Data Sources Documentation 4 | 5 | The data sources is a list of data sources that Ryoma supports. 6 | Ryoma can connect to these data sources, load data on the background and answer questions in natural language about the data. 7 | 8 | ```{toctree} 9 | :maxdepth: 2 10 | 11 | bigquery 12 | file 13 | postgresql 14 | snowflake 15 | ``` 16 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/ryoma_lab.py: -------------------------------------------------------------------------------- 1 | """Welcome to Ryoma!.""" 2 | 3 | import reflex as rx 4 | 5 | # Import all the pages. 6 | from ryoma_lab.pages import * # noqa: F403 7 | from ryoma_lab.styles import global_style 8 | 9 | 10 | class State(rx.State): 11 | """Define empty state to allow access to rx.State.router.""" 12 | 13 | 14 | # Create the app. 15 | app = rx.App( 16 | style=global_style, 17 | ) 18 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | # Ryoma Lab Configuration 2 | 3 | # Vector store configuration 4 | vector_store: 5 | type: pgvector 6 | url: postgresql://postgres:password@localhost:5432/vectordb 7 | collection_name: ryoma_vectors 8 | dimension: 768 9 | 10 | # Database configuration 11 | database: 12 | url: sqlite:///./data/ryoma.db 13 | 14 | # Other app settings 15 | app: 16 | name: ryoma_lab 17 | debug: true 18 | log_level: INFO -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/models/datasource.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import reflex as rx 4 | 5 | 6 | class DataSourceTable(rx.Model, table=True): 7 | """The DataSource model.""" 8 | 9 | __tablename__ = "datasource" 10 | 11 | name: str 12 | type: str 13 | connection_url: Optional[str] 14 | attributes: Optional[str] 15 | catalog_id: Optional[int] = None 16 | index_id: Optional[int] = None 17 | -------------------------------------------------------------------------------- /examples/example_arrow.py: -------------------------------------------------------------------------------- 1 | # write an arrow ADBC connect to postgresql and ingest data into it 2 | 3 | import pyarrow as pa 4 | from adbc_driver_postgres.dbapi import connect 5 | 6 | # create a table 7 | table = pa.table({"a": [1, 2, 3], "b": [4, 5, 6]}) 8 | 9 | connection = connect("postgresql://localhost:5432/postgres") 10 | 11 | # ingest data 12 | cursor = connection.cursor() 13 | cursor.adbc_ingest("table_name", table) 14 | connection.commit() 15 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/models/agent.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import reflex as rx 4 | from ryoma_ai.models.agent import AgentType 5 | from sqlmodel import Field 6 | 7 | 8 | class Agent(rx.Model, table=True): 9 | id: Optional[str] = Field(default=None, primary_key=True) 10 | name: str 11 | description: Optional[str] 12 | type: Optional[AgentType] = Field(default=AgentType.ryoma) 13 | workflow: Optional[str] 14 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/embedding/config.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | from pydantic import BaseModel 4 | 5 | 6 | class EmbeddingConfig(BaseModel): 7 | type: str # e.g. "openai", "huggingface", "cohere" 8 | model: Optional[str] = None # e.g. "text-embedding-3-small" 9 | api_key: Optional[str] = None 10 | endpoint: Optional[str] = None # e.g. custom URL for local or hosted model 11 | parameters: Optional[Dict[str, str]] = None # custom model kwargs 12 | -------------------------------------------------------------------------------- /.github/workflows/release-drafter.yml: -------------------------------------------------------------------------------- 1 | name: Release Drafter 2 | 3 | on: 4 | push: 5 | # branches to consider in the event; optional, defaults to all 6 | branches: 7 | - master 8 | 9 | jobs: 10 | update_release_draft: 11 | runs-on: ubuntu-latest 12 | steps: 13 | # Drafts your next Release notes as Pull Requests are merged into "master" 14 | - uses: release-drafter/release-drafter@v6.0.0 15 | env: 16 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 17 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # create a basic python image that install poetry and use poetry to install dependencies 2 | 3 | # Use the official Python image 4 | FROM python:3.10-slim 5 | 6 | # install additional dependencies like unzip 7 | RUN apt-get update && apt-get install -y \ 8 | build-essential \ 9 | software-properties-common \ 10 | unzip \ 11 | git \ 12 | curl \ 13 | && apt-get clean \ 14 | && rm -rf /var/lib/apt/lists/* 15 | 16 | # Set the working directory 17 | WORKDIR /app 18 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/models/tool.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import pandas as pd 4 | import reflex as rx 5 | 6 | 7 | class ToolArg(rx.Base): 8 | name: str 9 | required: Optional[bool] 10 | description: Optional[str] 11 | value: Optional[str] = "" 12 | 13 | 14 | class Tool(rx.Base): 15 | id: Optional[str] 16 | name: str 17 | args: list[ToolArg] = [] 18 | description: Optional[str] 19 | 20 | 21 | class ToolOutput(rx.Base): 22 | data: pd.DataFrame 23 | show: bool = False 24 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/services/embedding.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from langchain_core.embeddings import Embeddings 4 | from ryoma_ai.llm.provider import load_model_provider 5 | 6 | 7 | def get_embedding_client( 8 | selected_model: str, model_parameters: dict[str, str] = None 9 | ) -> Embeddings: 10 | logging.info(f"Creating embedding client for {selected_model}") 11 | return load_model_provider( 12 | selected_model, 13 | "embedding", 14 | model_parameters=model_parameters, 15 | ) 16 | -------------------------------------------------------------------------------- /docs/source/ryoma-lab/chat.md: -------------------------------------------------------------------------------- 1 | # Chat 2 | The Chat page is the main interfance for the user to interact with the Ryoma AI. The playground page is divided into two sections: 3 | 4 | 1. Chat Window 5 | 2. Notebook 6 | 7 | ## Chat Window 8 | The playground window is where the user can interact with the AI. The user can ask questions, run code, and explore data using Ryoma AI. 9 | 10 | ## Notebook 11 | The notebook is where the Ryoma AI generates code based on the user's input. The user can run the code in the notebook to see the output. 12 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/tool/__init__.py: -------------------------------------------------------------------------------- 1 | from ryoma_ai.tool.pandas_tool import PandasTool 2 | from ryoma_ai.tool.pyarrow_tool import ArrowTool 3 | from ryoma_ai.tool.python_tool import PythonTool 4 | from ryoma_ai.tool.spark_tool import ConvertPandasToSparkTool, SparkTool 5 | from ryoma_ai.tool.sql_tool import QueryProfileTool, SqlQueryTool 6 | 7 | __all__ = [ 8 | "PandasTool", 9 | "ArrowTool", 10 | "PythonTool", 11 | "ConvertPandasToSparkTool", 12 | "SparkTool", 13 | "QueryProfileTool", 14 | "SqlQueryTool", 15 | ] 16 | -------------------------------------------------------------------------------- /packages/ryoma_ai/.editorconfig: -------------------------------------------------------------------------------- 1 | # Check http://editorconfig.org for more information 2 | # This is the main config file for this current_store: 3 | root = true 4 | 5 | [*] 6 | charset = utf-8 7 | end_of_line = lf 8 | insert_final_newline = true 9 | indent_style = space 10 | indent_size = 2 11 | trim_trailing_whitespace = true 12 | 13 | [*.{py, pyi}] 14 | indent_style = space 15 | indent_size = 4 16 | 17 | [Makefile] 18 | indent_style = tab 19 | 20 | [*.md] 21 | trim_trailing_whitespace = false 22 | 23 | [*.{diff,patch}] 24 | trim_trailing_whitespace = false 25 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/pages/document.py: -------------------------------------------------------------------------------- 1 | """The home page of the app.""" 2 | 3 | import reflex as rx 4 | from ryoma_lab import styles 5 | from ryoma_lab.templates import template 6 | 7 | 8 | @template(route="/document", title="Document") 9 | def document() -> rx.Component: 10 | """The document page. 11 | 12 | Returns: 13 | The UI for the document page. 14 | """ 15 | with open("README.md", encoding="utf-8") as readme: 16 | content = readme.read() 17 | return rx.markdown(content, component_map=styles.markdown_style) 18 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Git 2 | .git 3 | .gitignore 4 | .github 5 | 6 | # Docker 7 | .dockerignore 8 | 9 | # IDE 10 | .idea 11 | .vscode 12 | 13 | # Byte-compiled / optimized / DLL files 14 | __pycache__/ 15 | **/__pycache__/ 16 | *.pyc 17 | *.pyo 18 | *.pyd 19 | .Python 20 | *.py[cod] 21 | *$py.class 22 | .pytest_cache/ 23 | ..mypy_cache/ 24 | 25 | # poetry 26 | .venv 27 | 28 | # C extensions 29 | *.so 30 | 31 | # Virtual environment 32 | .venv 33 | venv 34 | 35 | .DS_Store 36 | .AppleDouble 37 | .LSOverride 38 | ._* 39 | 40 | # exclude research in docker build 41 | research 42 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/models/kernel.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import reflex as rx 4 | from pydantic import Field 5 | from ryoma_lab.models.tool import Tool, ToolOutput 6 | 7 | 8 | class Kernel(rx.Model, table=True): 9 | datasource: Optional[str] = Field(None, description="Name of the datasource") 10 | type: str 11 | tool: Optional[str] = Field(None, description="Name of the tool") 12 | output: Optional[str] = Field(None, description="Output of the tool") 13 | 14 | 15 | class ToolKernel(rx.Base): 16 | tool: Tool 17 | output: ToolOutput 18 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/catalog/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Exception classes for catalog operations. 3 | """ 4 | 5 | from typing import Optional 6 | 7 | 8 | class CatalogIndexError(Exception): 9 | """Raised when catalog indexing operations fail.""" 10 | 11 | def __init__( 12 | self, operation: str, catalog_id: str, cause: Optional[Exception] = None 13 | ): 14 | message = f"Failed to {operation} catalog '{catalog_id}'" 15 | super().__init__(message) 16 | self.operation = operation 17 | self.catalog_id = catalog_id 18 | self.cause = cause 19 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/store/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Store module for managing data sources and catalogs using LangChain stores. 3 | """ 4 | 5 | from .catalog_store import CatalogIndex, CatalogStore 6 | from .data_source_store import DataSourceRegistration, DataSourceStore 7 | from .exceptions import CatalogNotFoundError, DataSourceNotFoundError, StoreException 8 | 9 | __all__ = [ 10 | "DataSourceStore", 11 | "DataSourceRegistration", 12 | "CatalogStore", 13 | "CatalogIndex", 14 | "StoreException", 15 | "DataSourceNotFoundError", 16 | "CatalogNotFoundError", 17 | ] 18 | -------------------------------------------------------------------------------- /docs/source/reference/agent/spark.md: -------------------------------------------------------------------------------- 1 | # Spark Agent 2 | 3 | Spark agent is an Ryoma agent specialize in writing spark code. 4 | 5 | ## Example 6 | 7 | 8 | ```python 9 | from ryoma_ai.agent.pyspark import PySparkAgent 10 | from ryoma_ai.datasource.postgres import PostgresDataSource 11 | 12 | datasource = PostgresDataSource("postgresql://localhost:5432/db") 13 | spark_configs = { 14 | "master": "local", 15 | "appName": "Ryoma" 16 | } 17 | spark_agent = PySparkAgent(spark_configs, "gpt-3.5-turbo") 18 | 19 | spark_agent.stream("I want to get the top customers which making the most purchases") 20 | ``` 21 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/loading_icon.py: -------------------------------------------------------------------------------- 1 | import reflex as rx 2 | 3 | 4 | class LoadingIcon(rx.Component): 5 | """A custom loading icon component.""" 6 | 7 | library = "react-loading-icons" 8 | tag = "SpinningCircles" 9 | stroke: rx.Var[str] 10 | stroke_opacity: rx.Var[str] 11 | fill: rx.Var[str] 12 | fill_opacity: rx.Var[str] 13 | stroke_width: rx.Var[str] 14 | speed: rx.Var[str] 15 | height: rx.Var[str] 16 | 17 | def get_event_triggers(self) -> dict: 18 | return {"on_change": lambda status: [status]} 19 | 20 | 21 | loading_icon = LoadingIcon.create 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: ❓ Question 3 | about: Ask a question about this project 🎓 4 | title: '' 5 | labels: question 6 | assignees: 7 | --- 8 | 9 | ## Checklist 10 | 11 | 12 | 13 | - [ ] I've searched the project's [`issues`](https://github.com/ryoma/ryoma/issues?q=is%3Aissue). 14 | 15 | ## ❓ Question 16 | 17 | 18 | 19 | How can I [...]? 20 | 21 | Is it possible to [...]? 22 | 23 | ## 📎 Additional context 24 | 25 | 26 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/prompt/base.py: -------------------------------------------------------------------------------- 1 | from langchain_core.prompts import ChatPromptTemplate 2 | 3 | BasePromptTemplate = ChatPromptTemplate.from_messages( 4 | messages=[ 5 | ( 6 | "system", 7 | """ 8 | You are an expert in the field of data science, analysis, and data engineering. 9 | """, 10 | ) 11 | ] 12 | ) 13 | 14 | BasicContextPromptTemplate = ChatPromptTemplate.from_messages( 15 | messages=[ 16 | ( 17 | "system", 18 | """ 19 | You are provided with the following context: 20 | {prompt_context} 21 | 22 | """, 23 | ) 24 | ] 25 | ) 26 | -------------------------------------------------------------------------------- /tests/unit_tests/test_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Quick test script for the Ryoma SQL CLI in development mode. 4 | """ 5 | 6 | import sys 7 | from pathlib import Path 8 | 9 | from ryoma_ai.cli.main import main 10 | 11 | # Add the ryoma_ai package to Python path 12 | ryoma_ai_path = Path(__file__).parent / "packages" / "ryoma_ai" 13 | sys.path.insert(0, str(ryoma_ai_path)) 14 | 15 | if __name__ == "__main__": 16 | # You can modify sys.argv to test different arguments 17 | # sys.argv = ["test_cli.py", "--setup"] # Test setup mode 18 | # sys.argv = ["test_cli.py", "--help"] # Test help 19 | 20 | main() 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🚀 Feature request 3 | about: Suggest an idea for this project 🏖 4 | title: '' 5 | labels: enhancement 6 | assignees: 7 | --- 8 | 9 | ## 🚀 Feature Request 10 | 11 | 12 | 13 | ## 🔈 Motivation 14 | 15 | 16 | 17 | ## 🛰 Alternatives 18 | 19 | 20 | 21 | ## 📎 Additional context 22 | 23 | 24 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/catalog/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Catalog management module for Ryoma AI. 3 | 4 | This module provides unified catalog indexing and search functionality. 5 | """ 6 | 7 | from ryoma_ai.catalog.exceptions import CatalogIndexError 8 | from ryoma_ai.catalog.indexer import ( 9 | CatalogIndexer, 10 | HierarchicalCatalogIndexer, 11 | IndexLevel, 12 | UnifiedCatalogIndexService, 13 | VectorIndexer, 14 | ) 15 | 16 | __all__ = [ 17 | "CatalogIndexer", 18 | "CatalogIndexError", 19 | "HierarchicalCatalogIndexer", 20 | "IndexLevel", 21 | "UnifiedCatalogIndexService", 22 | "VectorIndexer", 23 | ] 24 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/agent/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ryoma AI Agent Module 3 | 4 | This module provides various types of AI agents including chat agents, 5 | workflow agents, and SQL agents with factory pattern support. 6 | """ 7 | 8 | from .base import BaseAgent 9 | from .chat_agent import ChatAgent 10 | from .sql import BasicSqlAgent, EnhancedSqlAgentImpl, ReFoRCESqlAgentImpl, SqlAgent 11 | from .workflow import ToolMode, WorkflowAgent 12 | 13 | __all__ = [ 14 | "BaseAgent", 15 | "ChatAgent", 16 | "WorkflowAgent", 17 | "ToolMode", 18 | "SqlAgent", 19 | "BasicSqlAgent", 20 | "EnhancedSqlAgentImpl", 21 | "ReFoRCESqlAgentImpl", 22 | ] 23 | -------------------------------------------------------------------------------- /docs/source/getting-started/index.md: -------------------------------------------------------------------------------- 1 | (ryoma-getting-started)= 2 | 3 | # 🚀 Getting Started 4 | 5 | Welcome to Ryoma! This guide will help you get up and running with Ryoma's AI-powered data analysis capabilities. 6 | 7 | ## 🎯 What You'll Learn 8 | 9 | - **Quick Setup** - Install and configure Ryoma in minutes 10 | - **Basic Usage** - Connect to data sources and ask questions 11 | - **Advanced Features** - Leverage enhanced SQL agents and profiling 12 | - **Best Practices** - Production-ready configurations 13 | 14 | ```{toctree} 15 | :maxdepth: 2 16 | 17 | quickstart 18 | cli-usage 19 | configuration-reference 20 | advanced-setup 21 | troubleshooting 22 | examples 23 | ``` 24 | -------------------------------------------------------------------------------- /docs/source/reference/agent/pyarrow.md: -------------------------------------------------------------------------------- 1 | # Pyarrow Agent 2 | 3 | The Pyarrow agent is an Ryoma agent that runs on the Pyarrow library. 4 | The Pyarrow agent can be used to ask questions in natural language and interact with Pyarrow Tables. 5 | 6 | ## Example 7 | 8 | ```python 9 | from ryoma_ai.agent.pyarrow import PyArrowAgent 10 | import pyarrow as pa 11 | 12 | table = pa.table({ 13 | 'customer_id': pa.array([1, 2, 3, 4, 5]), 14 | 'purchase_amount': pa.array([100, 200, 300, 400, 500]) 15 | }) 16 | 17 | pa_agent = PyArrowAgent("gpt-3.5-turbo") 18 | .add_table(table) 19 | 20 | pa_agent.stream("I want to get the top customers which making the most purchases") 21 | ``` 22 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/cli/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ryoma AI CLI Module 3 | 4 | Modular, object-oriented CLI implementation for the Ryoma AI multi-agent system. 5 | """ 6 | 7 | from .agent_manager import AgentManager 8 | from .app import RyomaAI, main 9 | from .catalog_manager import CatalogManager 10 | from .command_handler import CommandHandler 11 | from .config_manager import ConfigManager 12 | from .datasource_manager import DataSourceManager 13 | from .display_manager import DisplayManager 14 | 15 | __all__ = [ 16 | "RyomaAI", 17 | "main", 18 | "ConfigManager", 19 | "DataSourceManager", 20 | "AgentManager", 21 | "DisplayManager", 22 | "CatalogManager", 23 | "CommandHandler", 24 | ] 25 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = source 9 | BUILDDIR = build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/tool/pyarrow_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Type 2 | 3 | from pydantic import BaseModel, Field 4 | from ryoma_ai.tool.python_tool import PythonTool 5 | 6 | 7 | class ArrowInput(BaseModel): 8 | script: str = Field(description="PyArrow analysis script") 9 | 10 | 11 | class ArrowTool(PythonTool): 12 | """Tool for using Apache Arrow in Python.""" 13 | 14 | name: str = "pyarrow_tool" 15 | description: str = """ 16 | Apache Arrow is a cross-language development platform for in-memory data analysis. 17 | This tool allows you to run PyArrow script in Python. 18 | 19 | PyArrow Table is available in the script context. 20 | """ 21 | args_schema: Type[BaseModel] = ArrowInput 22 | -------------------------------------------------------------------------------- /.github/.stale.yml: -------------------------------------------------------------------------------- 1 | # Number of days of inactivity before an issue becomes stale 2 | daysUntilStale: 60 3 | # Number of days of inactivity before a stale issue is closed 4 | daysUntilClose: 7 5 | # Issues with these labels will never be considered stale 6 | exemptLabels: 7 | - pinned 8 | - security 9 | # Label to use when marking an issue as stale 10 | staleLabel: wontfix 11 | # Comment to post when marking an issue as stale. Set to `false` to disable 12 | markComment: > 13 | This issue has been automatically marked as stale because it has not had 14 | recent activity. It will be closed if no further activity occurs. Thank you 15 | for your contributions. 16 | # Comment to post when closing a stale issue. Set to `false` to disable 17 | closeComment: false 18 | -------------------------------------------------------------------------------- /alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | from typing import Sequence, Union 9 | 10 | from alembic import op 11 | import sqlalchemy as sa 12 | ${imports if imports else ""} 13 | 14 | # revision identifiers, used by Alembic. 15 | revision: str = ${repr(up_revision)} 16 | down_revision: Union[str, None] = ${repr(down_revision)} 17 | branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)} 18 | depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)} 19 | 20 | 21 | def upgrade() -> None: 22 | ${upgrades if upgrades else "pass"} 23 | 24 | 25 | def downgrade() -> None: 26 | ${downgrades if downgrades else "pass"} 27 | -------------------------------------------------------------------------------- /tests/e2e/ryoma_ai/test_llm.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ryoma_ai.llm.provider import load_model_provider 3 | 4 | 5 | def test_gp44all_model(): 6 | """Test GPT4All model loading - will download model on first use.""" 7 | model_id = "gpt4all:Llama-3.2-1B-Instruct-Q4_0.gguf" 8 | 9 | # Try to load the model 10 | gp44all_model = load_model_provider(model_id) 11 | 12 | if gp44all_model is None: 13 | pytest.skip( 14 | f"GPT4All model {model_id} not available. Model needs to be downloaded first." 15 | ) 16 | 17 | # Test basic functionality 18 | response = gp44all_model.invoke("What is 2+2?") 19 | assert response is not None 20 | assert len(str(response)) > 0 21 | print(f"GPT4All response: {response}") 22 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/datasource/sqlite.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import ibis 4 | from ibis import BaseBackend 5 | from pydantic import BaseModel, Field 6 | from ryoma_ai.datasource.sql import SqlDataSource 7 | 8 | 9 | class SqliteConfig(BaseModel): 10 | connection_url: str = Field(..., description="Sqlite connection URL") 11 | 12 | 13 | class SqliteDataSource(SqlDataSource): 14 | def get_query_plan(self, query: str) -> Any: 15 | pass 16 | 17 | def crawl_catalog(self, **kwargs): 18 | pass 19 | 20 | def __init__(self, connection_url: str): 21 | super().__init__() 22 | self.connection_url = connection_url 23 | 24 | def _connect(self) -> BaseBackend: 25 | return ibis.sqlite.connect(self.connection_url) 26 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/code_editor.py: -------------------------------------------------------------------------------- 1 | import reflex as rx 2 | 3 | 4 | class ReactCodeMirror(rx.Component): 5 | library = "@uiw/react-codemirror" 6 | 7 | lib_dependencies: list[str] = ["@uiw/codemirror-extensions-langs"] 8 | 9 | tag = "CodeMirror" 10 | 11 | is_default = True 12 | 13 | value: rx.Var[str] 14 | 15 | height: rx.Var[str] 16 | 17 | minHeight: rx.Var[str] 18 | 19 | width: rx.Var[str] 20 | 21 | minWidth: rx.Var[str] 22 | 23 | theme: rx.Var[str] 24 | 25 | extensions: rx.Var[str] 26 | on_change: rx.EventHandler[lambda value: [value]] 27 | 28 | def add_imports(self): 29 | return {"@uiw/codemirror-extensions-langs": "loadLanguage"} 30 | 31 | 32 | codeeditor = ReactCodeMirror.create 33 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/models/vector_store.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Optional 3 | 4 | import reflex as rx 5 | 6 | 7 | class DocumentProject(rx.Model, table=True): 8 | """ 9 | Represents a document project/workspace that uses vector storage. 10 | Multiple projects can exist, each with their own document collections. 11 | The actual vector store configuration comes from rxconfig.py. 12 | """ 13 | 14 | project_name: str # Unique identifier for the project/workspace 15 | description: Optional[str] = None # Human-readable description 16 | document_count: int = 0 # Number of documents indexed 17 | created_at: Optional[datetime] = None 18 | updated_at: Optional[datetime] = None 19 | is_active: bool = True 20 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/agent/python_agent.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, Optional 2 | 3 | from ryoma_ai.agent.workflow import WorkflowAgent 4 | from ryoma_ai.tool.python_tool import PythonTool 5 | 6 | 7 | class PythonAgent(WorkflowAgent): 8 | description: str = "A Python agent that can use Python tools to run python scripts." 9 | 10 | def __init__( 11 | self, 12 | model: str, 13 | model_parameters: Optional[Dict] = None, 14 | ): 15 | super().__init__([PythonTool()], model, model_parameters) 16 | 17 | def add_script_context(self, script_context): 18 | for tool in self.tools: 19 | if isinstance(tool, PythonTool): 20 | tool.update_script_context(script_context=script_context) 21 | return self 22 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/datasource/nosql.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | try: 4 | import boto3 5 | except ImportError: 6 | boto3 = None 7 | 8 | from ryoma_ai.datasource.base import DataSource 9 | from ryoma_ai.datasource.metadata import Catalog 10 | 11 | 12 | class DynamodbDataSource(DataSource): 13 | def __init__(self, name: str, region_name: str = None, **kwargs): 14 | super().__init__(name=name, type="nosql", **kwargs) 15 | self.region_name = region_name 16 | self.client = boto3.client("dynamodb", region_name=region_name) 17 | 18 | def get_catalog(self, table_name: str) -> List[Catalog]: 19 | response = self.client.describe_table(TableName=table_name) 20 | return response["Table"] 21 | 22 | 23 | class DynamodbConfig: 24 | pass 25 | -------------------------------------------------------------------------------- /tests/unit_tests/datasource/test_duckdb.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import pytest 3 | from ryoma_ai.datasource.duckdb import DuckDBDataSource 4 | 5 | 6 | @pytest.fixture 7 | def test_pandas_df(): 8 | return pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) 9 | 10 | 11 | def test_query_with_register(test_pandas_df): 12 | data_source = DuckDBDataSource() 13 | data_source.register("pdf", test_pandas_df) 14 | query = "SELECT * FROM pdf" 15 | result = data_source.query(query) 16 | assert result.shape == test_pandas_df.shape 17 | 18 | 19 | def test_query(test_pandas_df): 20 | data_source = DuckDBDataSource() 21 | pdf = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]}) 22 | result = data_source.query("SELECT * FROM pdf", pdf=pdf) 23 | assert result.shape == test_pandas_df.shape 24 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/services/user.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | 3 | import reflex as rx 4 | from ryoma_lab.states.base import User 5 | 6 | 7 | class UserService: 8 | def __init__(self): 9 | self.session = rx.session() 10 | 11 | def __enter__(self): 12 | return self 13 | 14 | def __exit__(self, exc_type, exc_val, exc_tb): 15 | self.session.close() 16 | 17 | def create_user( 18 | self, username: str, email: str, hashed_password: str, permissions: dict 19 | ): 20 | user = User( 21 | id=str(uuid.uuid4()), 22 | username=username, 23 | email=email, 24 | hashed_password=hashed_password, 25 | permissions=permissions, 26 | ) 27 | self.session.add(user) 28 | self.session.commit() 29 | -------------------------------------------------------------------------------- /assets/paneleft.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /docs/source/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | @import "../basic.css"; 2 | 3 | html[data-theme="dark"] { 4 | --pst-color-text-base: #5c8fda; 5 | } 6 | 7 | .d2h-del { 8 | background-color: var(--pst-color-danger-highlight); 9 | color: var(--pst-color-text-base); 10 | } 11 | 12 | .d2h-ins { 13 | background-color: var(--pst-color-success); 14 | color: var(--pst-color-text-base); 15 | } 16 | 17 | .d2h-change { 18 | background-color: var(--yellow); 19 | color: var(--pst-color-text-base); 20 | } 21 | 22 | .sphinx-tabs-panel { 23 | background-color: var(--pst-color-background); 24 | } 25 | 26 | .sphinx-tabs-tab { 27 | background-color: var(--pst-color-background); 28 | } 29 | 30 | .sphinx-tabs { 31 | background-color: var(--pst-color-background); 32 | } 33 | 34 | .closeable{ 35 | background-color: var(--pst-color-background); 36 | } 37 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/services/kernel/pythonkernel.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from IPython.core.interactiveshell import InteractiveShell 4 | from ryoma_lab.services.kernel.base import BaseKernel 5 | 6 | 7 | class PythonKernel(BaseKernel): 8 | def execute(self, code: str) -> Dict[str, Any]: 9 | shell = InteractiveShell.instance() 10 | result = shell.run_cell(code, store_history=False) 11 | 12 | if result.success: 13 | return self._create_success_response(result.result) 14 | elif result.error_before_exec: 15 | return self._create_error_response(result.error_before_exec) 16 | elif result.error_in_exec: 17 | return self._create_error_response(result.error_in_exec) 18 | else: 19 | return self._create_error_response(Exception("An unknown error occurred")) 20 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/tool/pandas_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Type 2 | 3 | from pydantic import BaseModel, Field 4 | from ryoma_ai.datasource.sql import SqlDataSource 5 | from ryoma_ai.tool.python_tool import PythonTool 6 | 7 | 8 | class PandasInput(BaseModel): 9 | script: str = Field(description="pandas script") 10 | 11 | 12 | class PandasTool(PythonTool): 13 | """Tool for running Pandas analysis.""" 14 | 15 | name: str = "pandas_tool" 16 | description: str = """ 17 | Run a python script by using the Pandas library. 18 | If the script is not correct, an error message will be returned. 19 | 20 | Pandas dataframes are stored in the script context. 21 | """ 22 | datasource: Optional[SqlDataSource] = Field( 23 | None, exclude=True, description="SQL data source" 24 | ) 25 | 26 | args_schema: Type[BaseModel] = PandasInput 27 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/table.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import reflex as rx 4 | 5 | 6 | def table(tabular_data: list[list]): 7 | return rx.table.root( 8 | rx.table.header( 9 | rx.table.row( 10 | *[rx.table.column_header_cell(cell) for cell in tabular_data[0]], 11 | ), 12 | ), 13 | rx.table.body( 14 | *[ 15 | rx.table.row( 16 | *[ 17 | ( 18 | rx.table.row_header_cell(cell) 19 | if i == 0 20 | else rx.table.cell(cell) 21 | ) 22 | for i, cell in enumerate(row) 23 | ], 24 | ) 25 | for row in tabular_data[1:] 26 | ], 27 | ), 28 | ) 29 | -------------------------------------------------------------------------------- /docs/source/contribution/contribution.md: -------------------------------------------------------------------------------- 1 | # Contribution 2 | 3 | ## How to contribute 4 | 5 | 1. Fork & Clone the repository 6 | 7 | 2. Create a new branch 8 | 9 | ## Environment setup 10 | 11 | We are using python 3.9+ for this project. You can install the required packages by running the following command: 12 | 13 | ```bash 14 | make install 15 | ``` 16 | 17 | ## Running the tests 18 | 19 | You can run the tests by running the following command: 20 | 21 | ```bash 22 | make test 23 | ``` 24 | 25 | ## Check and fix the code style 26 | 27 | You can check the code style by running the following command: 28 | 29 | ```bash 30 | make check-codestyle 31 | ``` 32 | 33 | to fix the code style run the following command: 34 | 35 | ```bash 36 | make codestyle 37 | ``` 38 | 39 | ## Build the project 40 | 41 | You can build the project by running the following command: 42 | 43 | ```bash 44 | make build 45 | ``` 46 | -------------------------------------------------------------------------------- /tests/unit_tests/test_prompt_template.py: -------------------------------------------------------------------------------- 1 | from ryoma_ai.prompt.prompt_template import PromptTemplateFactory 2 | 3 | 4 | def test_base_prompt_template(): 5 | ryoma_prompt = PromptTemplateFactory() 6 | ryoma_prompt.set_base_template("This is a test prompt.") 7 | template = ryoma_prompt.build_prompt() 8 | messages = template.format_messages() 9 | assert messages[0].content == "This is a test prompt." 10 | 11 | 12 | def test_prompt_template(): 13 | ryoma_prompt = PromptTemplateFactory() 14 | ryoma_prompt.add_context_template( 15 | "You are provided with the following context: {prompt_context}" 16 | ) 17 | template = ryoma_prompt.build_prompt() 18 | messages = template.format_messages(prompt_context="This is a test context.") 19 | assert ( 20 | messages[1].content 21 | == "You are provided with the following context: This is a test context." 22 | ) 23 | -------------------------------------------------------------------------------- /.github/release-drafter.yml: -------------------------------------------------------------------------------- 1 | # Release drafter configuration https://github.com/release-drafter/release-drafter#configuration 2 | # Emojis were chosen to match the https://gitmoji.carloscuesta.me/ 3 | 4 | name-template: "v$NEXT_PATCH_VERSION" 5 | tag-template: "v$NEXT_PATCH_VERSION" 6 | 7 | categories: 8 | - title: ":rocket: Features" 9 | labels: [enhancement, feature] 10 | - title: ":wrench: Fixes & Refactoring" 11 | labels: [bug, refactoring, bugfix, fix] 12 | - title: ":package: Build System & CI/CD" 13 | labels: [build, ci, testing] 14 | - title: ":boom: Breaking Changes" 15 | labels: [breaking] 16 | - title: ":pencil: Documentation" 17 | labels: [documentation] 18 | - title: ":arrow_up: Dependencies updates" 19 | labels: [dependencies] 20 | 21 | template: | 22 | ## What’s Changed 23 | 24 | $CHANGES 25 | 26 | ## :busts_in_silhouette: List of contributors 27 | 28 | $CONTRIBUTORS 29 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/models/prompt.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import reflex as rx 4 | from sqlmodel import Field 5 | 6 | 7 | class PromptTemplate(rx.Model, table=True): 8 | prompt_repr: str = Field( 9 | ..., description="The prompt representation, e.g. SQL, TEXT, etc." 10 | ) 11 | k_shot: int = Field(..., description="The number of examples to use in the prompt.") 12 | example_format: str 13 | selector_type: str = Field( 14 | ..., 15 | description="The type of selector to use for the prompt. e.g. COSSIMILAR, RANDOM, etc.", 16 | ) 17 | prompt_template_name: str = Field( 18 | ..., description="The name of the prompt template." 19 | ) 20 | prompt_lines: str = Field(..., description="The prompt template lines.") 21 | prompt_template_type: Optional[str] = Field( 22 | default="custom", description="The type of prompt template." 23 | ) 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: 🐛 Bug report 3 | about: If something isn't working 🔧 4 | title: '' 5 | labels: bug 6 | assignees: 7 | --- 8 | 9 | ## 🐛 Bug Report 10 | 11 | 12 | 13 | ## 🔬 How To Reproduce 14 | 15 | Steps to reproduce the behavior: 16 | 17 | 1. ... 18 | 19 | ### Code sample 20 | 21 | 22 | 23 | ### Environment 24 | 25 | * OS: [e.g. Linux / Windows / macOS] 26 | * Python version, get it with: 27 | 28 | ```bash 29 | python --version 30 | ``` 31 | 32 | ### Screenshots 33 | 34 | 35 | 36 | ## 📈 Expected behavior 37 | 38 | 39 | 40 | ## 📎 Additional context 41 | 42 | 43 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | %SPHINXBUILD% >NUL 2>NUL 14 | if errorlevel 9009 ( 15 | echo. 16 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 17 | echo.installed, then set the SPHINXBUILD environment variable to point 18 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 19 | echo.may add the Sphinx directory to PATH. 20 | echo. 21 | echo.If you don't have Sphinx installed, grab it from 22 | echo.https://www.sphinx-doc.org/ 23 | exit /b 1 24 | ) 25 | 26 | if "%1" == "" goto help 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/models/agent.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass 2 | from enum import Enum 3 | from typing import Any, Dict, List 4 | 5 | 6 | class AgentType(Enum): 7 | ryoma = "ryoma_ai" 8 | chat = "chat" 9 | base = "base" 10 | embedding = "embedding" 11 | workflow = "workflow" 12 | custom = "custom" 13 | 14 | 15 | class SqlAgentMode(Enum): 16 | basic = "basic" 17 | enhanced = "enhanced" 18 | reforce = "reforce" 19 | 20 | 21 | @dataclass 22 | class ColumnExplorationResult: 23 | """Result from column exploration phase.""" 24 | 25 | exploration_queries: List[str] 26 | exploration_results: List[str] 27 | relevant_columns: List[str] 28 | column_insights: Dict[str, Any] 29 | 30 | 31 | @dataclass 32 | class FormatRestriction: 33 | """Expected answer format restriction.""" 34 | 35 | format_description: str 36 | column_names: List[str] 37 | data_types: List[str] 38 | example_format: str 39 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/models/catalog.py: -------------------------------------------------------------------------------- 1 | from dataclasses import asdict, dataclass 2 | from datetime import datetime 3 | from typing import Any, Dict 4 | 5 | 6 | @dataclass 7 | class CatalogIndex: 8 | """Catalog index metadata.""" 9 | 10 | catalog_id: str 11 | data_source_id: str 12 | catalog_name: str 13 | indexed_at: datetime 14 | schema_count: int 15 | table_count: int 16 | column_count: int 17 | index_level: str # catalog, schema, table, column 18 | 19 | def to_dict(self) -> Dict[str, Any]: 20 | """Convert to dictionary for storage.""" 21 | data = asdict(self) 22 | data["indexed_at"] = self.indexed_at.isoformat() 23 | return data 24 | 25 | @classmethod 26 | def from_dict(cls, data: Dict[str, Any]) -> "CatalogIndex": 27 | """Create from dictionary loaded from storage.""" 28 | data["indexed_at"] = datetime.fromisoformat(data["indexed_at"]) 29 | return cls(**data) 30 | -------------------------------------------------------------------------------- /research/azure_openai.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from azure.identity import DefaultAzureCredential, get_bearer_token_provider 4 | from openai import AzureOpenAI 5 | 6 | endpoint = os.environ["AZURE_OPENAI_ENDPOINT"] 7 | api_key = os.environ["AZURE_OPENAI_API_KEY"] 8 | deployment = os.environ["CHAT_COMPLETIONS_DEPLOYMENT_NAME"] 9 | search_endpoint = os.environ["SEARCH_ENDPOINT"] 10 | search_index = os.environ["SEARCH_INDEX"] 11 | 12 | token_provider = get_bearer_token_provider( 13 | DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default" 14 | ) 15 | 16 | client = AzureOpenAI( 17 | azure_endpoint=endpoint, 18 | api_key=api_key, 19 | api_version="2024-02-01", 20 | ) 21 | 22 | # completion = client.chat.completions.create( 23 | # model=deployment, 24 | # messages=[ 25 | # { 26 | # "role": "user", 27 | # "content": "Hi, how are you?", 28 | # }, 29 | # ], 30 | # ) 31 | # 32 | # print(completion.to_json()) 33 | -------------------------------------------------------------------------------- /assets/images/coverage.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | coverage 17 | coverage 18 | 34% 19 | 34% 20 | 21 | 22 | -------------------------------------------------------------------------------- /tests/unit_tests/test_catalog.py: -------------------------------------------------------------------------------- 1 | from ryoma_ai.datasource.metadata import Catalog 2 | 3 | data = { 4 | "catalog_name": "main", 5 | "schemas": [ 6 | { 7 | "schema_name": "", 8 | "tables": [ 9 | { 10 | "table_name": "author", 11 | "columns": [ 12 | {"name": "aid", "type": "INT", "nullable": 1}, 13 | {"name": "homepage", "type": "TEXT", "nullable": 1}, 14 | {"name": "name", "type": "TEXT", "nullable": 1}, 15 | {"name": "oid", "type": "INT", "nullable": 1}, 16 | ], 17 | } 18 | ], 19 | } 20 | ], 21 | } 22 | 23 | 24 | def test_catalog_model(): 25 | catalog = Catalog(**data) 26 | assert catalog.catalog_name == "main" 27 | assert len(catalog.schemas) == 1 28 | assert catalog.schemas[0].schema_name == "" 29 | assert len(catalog.schemas[0].tables) == 1 30 | -------------------------------------------------------------------------------- /docs/source/ryoma-lab/ryomalab.md: -------------------------------------------------------------------------------- 1 | # Ryoma Lab 2 | 3 | Welcome to Ryoma Lab! Ryoma lab is an interactive platform where you can ask questions, run code, and explore data using Ryoma AI. 4 | 5 | ## How to get started? 6 | 1. Before you start, make sure you have ryoma installed by running the following command: 7 | ```text 8 | pip install ryoma 9 | ``` 10 | 2. Once installed, you can setup configuration file `rxconfig.py` in your project: 11 | ```python 12 | import reflex as rx 13 | from reflex.constants import LogLevel 14 | 15 | config = rx.Config( 16 | app_name="ryoma_lab", 17 | loglevel=LogLevel.INFO, 18 | ) 19 | ``` 20 | more information on configuration can be found [Reflex config](https://reflex.dev/docs/getting-started/configuration/). 21 | 22 | 3. Now you can run the following command to start Ryoma Lab: 23 | ```bash 24 | ryoma_lab run 25 | ``` 26 | 27 | The app will start running on `http://localhost:3000/`. You can open this URL in your browser to start using Ryoma Lab. 28 | 29 | ## Components 30 | 31 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # Configuration: https://dependabot.com/docs/config-file/ 2 | # Docs: https://docs.github.com/en/github/administering-a-repository/keeping-your-dependencies-updated-automatically 3 | 4 | version: 2 5 | 6 | updates: 7 | # - package-ecosystem: "pip" 8 | # directory: "/" 9 | # schedule: 10 | # interval: "daily" 11 | # allow: 12 | # - dependency-type: "all" 13 | # commit-message: 14 | # prefix: ":arrow_up:" 15 | # open-pull-requests-limit: 50 16 | 17 | - package-ecosystem: "github-actions" 18 | directory: "/" 19 | schedule: 20 | interval: "daily" 21 | allow: 22 | - dependency-type: "all" 23 | commit-message: 24 | prefix: ":arrow_up:" 25 | open-pull-requests-limit: 50 26 | 27 | - package-ecosystem: "docker" 28 | directory: "/docker" 29 | schedule: 30 | interval: "weekly" 31 | allow: 32 | - dependency-type: "all" 33 | commit-message: 34 | prefix: ":arrow_up:" 35 | open-pull-requests-limit: 50 36 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/agent/arrow_agent.py: -------------------------------------------------------------------------------- 1 | from typing import Dict 2 | 3 | import pyarrow as pa 4 | from ryoma_ai.agent.workflow import WorkflowAgent 5 | from ryoma_ai.tool.pyarrow_tool import ArrowTool 6 | from ryoma_ai.tool.python_tool import PythonTool 7 | 8 | 9 | class ArrowAgent(WorkflowAgent): 10 | description: str = ( 11 | "An Arrow agent that can use Arrow tools to interact with Arrow Tables." 12 | ) 13 | 14 | def __init__(self, model: str, model_parameters: Dict = None): 15 | super().__init__([ArrowTool()], model, model_parameters) 16 | 17 | def add_table(self, table: pa.Table): 18 | table_id = f"table_{id(table)}" 19 | self.add_prompt( 20 | f""" 21 | pyarrow table name: {table_id} 22 | pyarrow table metadata: {table.schema} 23 | """ 24 | ) 25 | for tool in self.tools: 26 | if isinstance(tool, PythonTool): 27 | tool.update_script_context(script_context={table_id: table}) 28 | return self 29 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/embedding/factory.py: -------------------------------------------------------------------------------- 1 | from ryoma_ai.embedding.client import EmbeddingClient, LangchainEmbeddingClient 2 | from ryoma_ai.embedding.config import EmbeddingConfig 3 | from ryoma_ai.llm.provider import load_model_provider 4 | 5 | 6 | def create_embedder(config: EmbeddingConfig) -> EmbeddingClient: 7 | """ 8 | Creates an EmbeddingClient using the provided configuration. 9 | """ 10 | model_parameters = config.parameters or {} 11 | 12 | # Allow config.api_key and config.endpoint to override parameters 13 | if config.api_key: 14 | model_parameters["api_key"] = config.api_key 15 | if config.endpoint: 16 | model_parameters["endpoint"] = config.endpoint 17 | 18 | # Load LangChain-compatible embedding model 19 | langchain_embedder = load_model_provider( 20 | config.model, 21 | "embedding", 22 | model_parameters=model_parameters, 23 | ) 24 | 25 | # Wrap it in Ryoma-compatible EmbeddingClient 26 | return LangchainEmbeddingClient(langchain_embedder) 27 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/tool/spark_tool.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from ryoma_ai.tool.python_tool import PythonTool 3 | 4 | 5 | class SparkTool(PythonTool): 6 | """Tool for running PySpark script.""" 7 | 8 | name: str = "pyspark_tool" 9 | description: str = """ 10 | Run a PySpark analysis script. 11 | The last line of the script should return a PySpark dataframe. 12 | If the script is not correct, an error message will be returned. 13 | """ 14 | 15 | 16 | class ConvertPandasToSparkTool(PythonTool): 17 | """Tool for converting a Pandas dataframe to a PySpark dataframe.""" 18 | 19 | name: str = "convert_pandas_to_pyspark" 20 | description: str = """ 21 | Convert a Pandas dataframe to a PySpark dataframe. 22 | If the Pandas dataframe is not correct, an error message will be returned. 23 | """ 24 | 25 | def _run(self, dataframe: pd.DataFrame, **kwargs): 26 | """Convert the Pandas dataframe to a PySpark dataframe.""" 27 | return self.script_context["spark_session"].createDataFrame(dataframe) 28 | -------------------------------------------------------------------------------- /docs/source/tech-specs/tech_spec_v1.md: -------------------------------------------------------------------------------- 1 | 2 | # Ryoma tech spec v1 3 | 4 | This document describes the technical specifications of the project. 5 | 6 | ## Overview 7 | 8 | ### V1 Architecture 9 | 10 | ![Architecture](Architecture_v1.png) 11 | 12 | ## Components 13 | 14 | Each design component map to an interface, as well as a database table. 15 | 16 | ### A) Data Sources 17 | A data source contains the connector to the underlying db 18 | 19 | #### UI 20 | 21 | #### API 22 | 23 | 24 | #### Service 25 | 26 | 27 | ### B) Catalogs 28 | Data Catalogs contain the information (description/schema/data types) of data sources. Specifically, the catalogs include: 29 | 30 | 1. Name 31 | Name of the data source. 32 | 2. Type 33 | Database, Schema, or Table. 34 | 3. Description 35 | 4. Schema 36 | Schema of the table. 37 | 5. Data Types 38 | Each type of the column in the table. 39 | 6. Metadata 40 | Size of the data source. 41 | 42 | ### C) Vector store 43 | Vector store is used for storing the indexes of the data catalogs, as well as the user custom RAG content. 44 | 45 | #### APIs -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/states/utils.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from typing import Any 3 | 4 | from langchain_core.pydantic_v1 import BaseModel 5 | 6 | 7 | def get_model_classes(model: Any) -> list: 8 | return inspect.getmembers(model, inspect.isclass) 9 | 10 | 11 | def get_model_fields(model: BaseModel, field_name: str) -> BaseModel: 12 | return model.__fields__[field_name].default 13 | 14 | 15 | def get_model_fields_as_dict(model: BaseModel) -> dict: 16 | d = {} 17 | for field, value in model.model_fields.items(): 18 | # In Pydantic v1, check if the field is required by checking if default is ... (Ellipsis) 19 | is_required = value.default is ... if hasattr(value, "default") else True 20 | description = ( 21 | value.field_info.description 22 | if hasattr(value, "field_info") and value.field_info 23 | else None 24 | ) 25 | 26 | d[field] = { 27 | "name": field, 28 | "required": is_required, 29 | "description": description, 30 | } 31 | return d 32 | -------------------------------------------------------------------------------- /rxconfig.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Optional 3 | 4 | import reflex as rx 5 | from reflex.constants import LogLevel 6 | 7 | 8 | class RyomaConfig(rx.Config): 9 | """Extended Reflex config with vector store settings.""" 10 | 11 | # Vector store configuration 12 | vector_store_type: str = "chroma" 13 | vector_store_url: Optional[str] = None 14 | vector_store_collection: str = "ryoma_vectors" 15 | vector_store_dimension: int = 768 16 | 17 | 18 | config = RyomaConfig( 19 | app_name="ryoma_lab", 20 | loglevel=LogLevel.INFO, 21 | # db_url="duckdb:///:memory:", 22 | # Vector store settings (can be overridden by environment variables) 23 | vector_store_type="chroma", # Can be: chroma, pgvector, milvus, qdrant, faiss 24 | vector_store_url=None, # If None, will use defaults for the store type 25 | vector_store_collection="ryoma_vectors", 26 | vector_store_dimension=768, 27 | ) 28 | 29 | # Setup basic configuration for logging 30 | logging.basicConfig( 31 | level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" 32 | ) 33 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3.10 3 | 4 | default_stages: [commit, push] 5 | 6 | repos: 7 | - repo: https://github.com/pre-commit/pre-commit-hooks 8 | rev: v2.5.0 9 | hooks: 10 | - id: check-yaml 11 | - id: end-of-file-fixer 12 | exclude: LICENSE 13 | 14 | - repo: local 15 | hooks: 16 | - id: pyupgrade 17 | name: pyupgrade 18 | entry: uv run pyupgrade --py38-plus 19 | types: [python] 20 | language: system 21 | 22 | - repo: local 23 | hooks: 24 | - id: isort 25 | name: isort 26 | entry: uv run isort --settings-path pyproject.toml 27 | types: [python] 28 | language: system 29 | 30 | - repo: local 31 | hooks: 32 | - id: black 33 | name: black 34 | entry: uv run black --config pyproject.toml 35 | types: [python] 36 | language: system 37 | 38 | - repo: local 39 | hooks: 40 | - id: lint 41 | name: lint 42 | entry: make lint 43 | types: [python] 44 | language: system 45 | pass_filenames: false 46 | -------------------------------------------------------------------------------- /docs/source/roadmap/roadmap.md: -------------------------------------------------------------------------------- 1 | 2 | # Roadmap 3 | 4 | The list below contains the functionality that contributors are planning to develop for Ryoma. 5 | 6 | * We welcome contribution to all items in the roadmap! 7 | 8 | * **Data Sources** 9 | * [x] [Snowflake source](https://docs.ryoma.dev/reference/data-sources/snowflake) 10 | * [ ] [BigQuery source](https://docs.ryoma.dev/reference/data-sources/bigquery) 11 | * [x] [Parquet file source](https://docs.ryoma.dev/reference/data-sources/file) 12 | * [x] [Postgres (contrib plugin)](https://docs.ryoma.dev/reference/data-sources/postgres) 13 | 14 | * **Agents** 15 | * [x] [Python Agent](https://docs.ryoma.dev/reference/agents/python-agent) 16 | * [ ] [Java Agent](https://docs.ryoma.dev/reference/agents/java-agent) 17 | * [ ] [Go Agent](https://docs.ryoma.dev/reference/agents/go-agent) 18 | 19 | * **Tools** 20 | * [x] [Python tool](https://docs.ryoma.dev/reference/tools/python) 21 | * [x] [Pandas tool](https://docs.ryoma.dev/reference/tools/pandas) 22 | * [x] [Sql tool](https://docs.ryoma.dev/reference/tools/sql) 23 | * [ ] [Spark tool](https://docs.ryoma.dev/reference/tools/spark) -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | strategy: 9 | matrix: 10 | python-version: ["3.10", "3.11", "3.12"] 11 | 12 | defaults: 13 | run: 14 | working-directory: ./ 15 | 16 | steps: 17 | - uses: actions/checkout@v4 18 | - name: Set up Python ${{ matrix.python-version }} 19 | uses: actions/setup-python@v5.0.0 20 | with: 21 | python-version: ${{ matrix.python-version }} 22 | 23 | - name: Install uv 24 | run: make uv-download 25 | 26 | - name: Set up cache 27 | uses: actions/cache@v4.2.2 28 | with: 29 | path: .venv 30 | key: venv-${{ matrix.python-version }}-${{ hashFiles('pyproject.toml') }}-${{ hashFiles('poetry.lock') }} 31 | - name: Install dependencies 32 | run: | 33 | make install 34 | - name: Run style checks 35 | run: | 36 | make check-codestyle 37 | 38 | - name: Run unit tests 39 | run: | 40 | make unit-test 41 | 42 | # - name: Run safety checks 43 | # run: | 44 | # make check-safety 45 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/cli.py: -------------------------------------------------------------------------------- 1 | from reflex.reflex import cli 2 | from ryoma_lab.services.user import UserService 3 | from ryoma_lab.services.vector_store import VectorStoreService 4 | 5 | 6 | @cli.command( 7 | name="bootstrap", 8 | help="Bootstrap the application with initial data", 9 | ) 10 | def bootstrap(): 11 | with UserService() as user_service: 12 | user_service.create_user( 13 | username="admin", 14 | email="admin@ryoma_ai.com", 15 | hashed_password="admin", 16 | permissions={"admin": ["read", "write"]}, 17 | ) 18 | 19 | with VectorStoreService() as vector_store_service: 20 | vector_store_service.create_store( 21 | project_name="default", 22 | online_store="sqlite", 23 | online_store_configs={ 24 | "type": "sqlite", 25 | "path": "sqlite:///data/default.db", 26 | "vector_enabled": True, 27 | }, 28 | offline_store="", 29 | offline_store_configs={}, 30 | ) 31 | 32 | 33 | def main(): 34 | cli() 35 | 36 | 37 | if __name__ == "__main__": 38 | main() 39 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/store/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Configuration for store backends. 3 | """ 4 | 5 | from typing import Any, Dict, Optional 6 | 7 | from pydantic import BaseModel, Field 8 | 9 | 10 | class StoreConfig(BaseModel): 11 | """Configuration for store backend.""" 12 | 13 | type: str = Field( 14 | default="memory", description="Type of store: memory, postgres, redis" 15 | ) 16 | 17 | connection_string: Optional[str] = Field( 18 | default=None, description="Connection string for database stores" 19 | ) 20 | 21 | options: Dict[str, Any] = Field( 22 | default_factory=dict, description="Additional store-specific options" 23 | ) 24 | 25 | def to_factory_params(self) -> Dict[str, Any]: 26 | """Convert config to parameters for StoreFactory.""" 27 | params = {"store_type": self.type} 28 | 29 | if self.connection_string: 30 | params["store_config"] = { 31 | "connection_string": self.connection_string, 32 | **self.options, 33 | } 34 | elif self.options: 35 | params["store_config"] = self.options 36 | 37 | return params 38 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/agent/embedding.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Dict, List, Optional, Union 3 | 4 | from langchain_core.documents import Document 5 | from langchain_core.embeddings import Embeddings 6 | from ryoma_ai.agent.base import BaseAgent 7 | from ryoma_ai.llm.provider import load_model_provider 8 | from ryoma_ai.models.agent import AgentType 9 | 10 | 11 | class EmbeddingAgent(BaseAgent): 12 | type: str = AgentType.embedding 13 | description: str = "Simple Embedding Agent" 14 | 15 | def __init__(self, model, model_parameters: Optional[Dict] = None): 16 | logging.info(f"Initializing Embedding Agent with model: {model}") 17 | self.embedding: Embeddings = load_model_provider( 18 | model, "embedding", model_parameters=model_parameters 19 | ) 20 | 21 | def embed_documents(self, texts: List[Document]) -> List[List[float]]: 22 | return self.embedding.embed_documents([text.page_content for text in texts]) 23 | 24 | def embed_query(self, text: Union[Document, str]) -> List[float]: 25 | text = text.page_content if isinstance(text, Document) else text 26 | return self.embedding.embed_query(text) 27 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/states/ai.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from ryoma_lab.models.embedding import Embedding 4 | from ryoma_lab.states.base import BaseState 5 | 6 | 7 | class AIState(BaseState): 8 | tab_value: str = "agent" 9 | 10 | embedding: Optional[Embedding] = None 11 | 12 | selected_model: str = "gpt4all:all-MiniLM-L6-v2-f16" 13 | dimension: int = 512 14 | api_key: Optional[str] = "" 15 | 16 | def set_model(self, model: str): 17 | self.selected_model = model 18 | self.load_embedding() 19 | 20 | def set_dimension(self, dimension: str): 21 | try: 22 | self.dimension = int(dimension) if dimension else 512 23 | except ValueError: 24 | self.dimension = 512 25 | self.load_embedding() 26 | 27 | def set_api_key(self, api_key: str): 28 | self.api_key = api_key 29 | self.load_embedding() 30 | 31 | def load_embedding(self): 32 | self.embedding = Embedding( 33 | model=self.selected_model, 34 | model_parameters={"api_key": self.api_key, "dimension": self.dimension}, 35 | ) 36 | 37 | def on_load(self): 38 | self.load_embedding() 39 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/agent/internals/sql_log_agent.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | from ryoma_ai.agent.chat_agent import ChatAgent 4 | 5 | 6 | class SQLLogToNLAgent(ChatAgent): 7 | def __init__(self, *args, datasource, **kwargs): 8 | super().__init__(*args, **kwargs) 9 | self.datasource = datasource 10 | 11 | def generate_nl_from_sql(self, sql: str, table_name: str) -> str: 12 | profile = self.datasource.profile_table(table_name) 13 | schema_desc = "\n".join(f"{col}: {desc}" for col, desc in profile.items()) 14 | 15 | prompt = f""" 16 | Given the following SQL query and table column descriptions, write a natural language question that would generate this query. 17 | 18 | Table: {table_name} 19 | Schema: 20 | {schema_desc} 21 | 22 | SQL: 23 | {sql} 24 | 25 | Question: 26 | """ 27 | return self.chat(prompt).content 28 | 29 | def process_sql_logs( 30 | self, sql_log: List[str], table_name: str 31 | ) -> List[Dict[str, str]]: 32 | return [ 33 | { 34 | "sql": sql, 35 | "question": self.generate_nl_from_sql(sql, table_name), 36 | } 37 | for sql in sql_log 38 | ] 39 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/services/kernel/service.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from ryoma_ai.datasource.base import DataSource 4 | from ryoma_lab.services.kernel.base import BaseKernel 5 | from ryoma_lab.services.kernel.pythonkernel import PythonKernel 6 | from ryoma_lab.services.kernel.sqlkernel import SqlKernel 7 | 8 | 9 | class KernelService: 10 | def __init__(self, datasource: Optional[DataSource] = None): 11 | self.datasource = datasource 12 | 13 | def __enter__(self): 14 | return self 15 | 16 | def __exit__(self, exc_type, exc_val, exc_tb): 17 | pass 18 | 19 | def create_kernel(self, kernel_type: str, **kwargs) -> BaseKernel: 20 | if kernel_type == "sql": 21 | kernel = SqlKernel(datasource=self.datasource, **kwargs) 22 | return kernel 23 | elif kernel_type == "python": 24 | return PythonKernel(datasource=self.datasource, **kwargs) 25 | else: 26 | raise ValueError(f"Unsupported kernel type: {kernel_type}") 27 | 28 | def set_datasource(self, datasource: DataSource): 29 | self.datasource = datasource 30 | 31 | def get_datasource(self) -> Optional[DataSource]: 32 | return self.datasource 33 | -------------------------------------------------------------------------------- /tests/e2e/ryoma_ai/test_datasource.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from ryoma_ai.datasource.postgres import PostgresDataSource 3 | from sqlalchemy import create_engine 4 | 5 | 6 | @pytest.fixture 7 | def postgres(): 8 | # Use environment variables for PostgreSQL connection, with defaults for local testing 9 | import os 10 | 11 | return PostgresDataSource( 12 | user=os.environ.get("POSTGRES_USER", "postgres"), 13 | password=os.environ.get("POSTGRES_PASSWORD", "postgres"), 14 | host=os.environ.get("POSTGRES_HOST", "localhost"), 15 | port=int(os.environ.get("POSTGRES_PORT", 5432)), 16 | database=os.environ.get("POSTGRES_DB", "postgres"), 17 | db_schema="public", 18 | ) 19 | 20 | 21 | def test_postgres_connection(postgres): 22 | conn = postgres.connect() 23 | assert conn is not None 24 | 25 | 26 | def test_postgres_get_metadata(postgres): 27 | metadata = postgres.get_catalog() 28 | assert metadata is not None 29 | assert len(metadata.tables) > 0 30 | 31 | 32 | def test_postgres_connection_string(postgres): 33 | conn_str = postgres.connection_string() 34 | engine = create_engine(conn_str) 35 | conn = engine.connect() 36 | assert conn is not None 37 | conn.close() 38 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/agent/factory.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | from typing import Union 3 | 4 | from ryoma_ai.agent.arrow_agent import ArrowAgent 5 | from ryoma_ai.agent.chat_agent import ChatAgent 6 | from ryoma_ai.agent.embedding import EmbeddingAgent 7 | from ryoma_ai.agent.pandas_agent import PandasAgent 8 | from ryoma_ai.agent.python_agent import PythonAgent 9 | from ryoma_ai.agent.spark_agent import SparkAgent 10 | from ryoma_ai.agent.sql import SqlAgent 11 | from ryoma_ai.agent.workflow import WorkflowAgent 12 | 13 | 14 | class AgentProvider(Enum): 15 | base = ChatAgent 16 | sql = SqlAgent 17 | pandas = PandasAgent 18 | pyarrow = ArrowAgent 19 | pyspark = SparkAgent 20 | python = PythonAgent 21 | embedding = EmbeddingAgent 22 | 23 | 24 | def get_builtin_agents(): 25 | return list(AgentProvider) 26 | 27 | 28 | class AgentFactory: 29 | @staticmethod 30 | def create_agent( 31 | agent_type: str, *args, **kwargs 32 | ) -> Union[EmbeddingAgent, ChatAgent, WorkflowAgent]: 33 | if not agent_type or not hasattr(AgentProvider, agent_type): 34 | agent_class = ChatAgent 35 | else: 36 | agent_class = AgentProvider[agent_type].value 37 | return agent_class(*args, **kwargs) 38 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security 2 | 3 | ## 🔐 Reporting Security Issues 4 | 5 | > Do not open issues that might have security implications! 6 | > It is critical that security related issues are reported privately so we have time to address them before they become public knowledge. 7 | 8 | Vulnerabilities can be reported by emailing core members: 9 | 10 | - ryoma [contact@project-ryoma.com](mailto:contact@project-ryoma.com) 11 | 12 | Please include the requested information listed below (as much as you can provide) to help us better understand the nature and scope of the possible issue: 13 | 14 | - Type of issue (e.g. buffer overflow, SQL injection, cross-site scripting, etc.) 15 | - Full paths of source file(s) related to the manifestation of the issue 16 | - The location of the affected source code (tag/branch/commit or direct URL) 17 | - Any special configuration required to reproduce the issue 18 | - Environment (e.g. Linux / Windows / macOS) 19 | - Step-by-step instructions to reproduce the issue 20 | - Proof-of-concept or exploit code (if possible) 21 | - Impact of the issue, including how an attacker might exploit the issue 22 | 23 | This information will help us triage your report more quickly. 24 | 25 | ## Preferred Languages 26 | 27 | We prefer all communications to be in English. 28 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/models/datasource.py: -------------------------------------------------------------------------------- 1 | """ 2 | Data source Registration Model 3 | """ 4 | 5 | import logging 6 | from dataclasses import asdict, dataclass 7 | from datetime import datetime 8 | from typing import Any, Dict, List, Optional 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | @dataclass 14 | class DataSourceRegistration: 15 | """Data source registration information.""" 16 | 17 | id: str 18 | name: str 19 | type: str 20 | config: Dict[str, Any] 21 | created_at: datetime 22 | updated_at: datetime 23 | is_active: bool = True 24 | description: Optional[str] = None 25 | tags: Optional[List[str]] = None 26 | 27 | def to_dict(self) -> Dict[str, Any]: 28 | """Convert to dictionary for storage.""" 29 | data = asdict(self) 30 | data["created_at"] = self.created_at.isoformat() 31 | data["updated_at"] = self.updated_at.isoformat() 32 | return data 33 | 34 | @classmethod 35 | def from_dict(cls, data: Dict[str, Any]) -> "DataSourceRegistration": 36 | """Create from dictionary loaded from storage.""" 37 | data["created_at"] = datetime.fromisoformat(data["created_at"]) 38 | data["updated_at"] = datetime.fromisoformat(data["updated_at"]) 39 | return cls(**data) 40 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | 4 | 5 | ## Related Issue 6 | 7 | 8 | 9 | ## Type of Change 10 | 11 | 12 | 13 | - [ ] 📚 Examples / docs / tutorials / dependencies update 14 | - [ ] 🔧 Bug fix (non-breaking change which fixes an issue) 15 | - [ ] 🥂 Improvement (non-breaking change which improves an existing feature) 16 | - [ ] 🚀 New feature (non-breaking change which adds functionality) 17 | - [ ] 💥 Breaking change (fix or feature that would cause existing functionality to change) 18 | - [ ] 🔐 Security fix 19 | 20 | ## Checklist 21 | 22 | 23 | 24 | - [ ] I've read the [`CODE_OF_CONDUCT.md`](https://github.com/ryoma/ryoma/blob/master/CODE_OF_CONDUCT.md) document. 25 | - [ ] I've read the [`CONTRIBUTING.md`](https://github.com/ryoma/ryoma/blob/master/CONTRIBUTING.md) guide. 26 | - [ ] I've updated the code style using `make codestyle`. 27 | - [ ] I've written tests for all new methods and classes that I created. 28 | - [ ] I've written the docstring in Google format for all the methods and classes that I used. 29 | -------------------------------------------------------------------------------- /docs/source/reference/agent/pandas.md: -------------------------------------------------------------------------------- 1 | # Pandas Agent 2 | 3 | The Pandas agent is an Ryoma agent that runs on the Pandas library. 4 | The Pandas agent can be used to ask questions in natural language and interact with Pandas DataFrames. 5 | 6 | ## Example 7 | 8 | pass Data Source to Pandas Agent and return result as a dataframe. 9 | 10 | 11 | ```python 12 | from ryoma_ai.agent.pandas_agent import PandasAgent 13 | from ryoma_ai.datasource.sqlite import SqliteDataSource 14 | from ryoma_ai.prompt.base import BasicContextPromptTemplate 15 | 16 | datasource = SqliteDataSource("sqlite:///data.db") 17 | pandas_agent = PandasAgent("gpt-3.5-turbo") 18 | .set_context_prompt(BasicContextPromptTemplate) 19 | .add_datasource(datasource) 20 | pandas_agent.stream("Get the top 10 customers by purchase amount") 21 | ``` 22 | 23 | add a DataFrame to the Pandas Agent, ask the agent to analyze the data. 24 | 25 | 26 | ```python 27 | from ryoma_ai.agent.pandas_agent import PandasAgent 28 | import pandas as pd 29 | 30 | df = pd.DataFrame({ 31 | 'customer_id': [1, 2, 3, 4, 5], 32 | 'purchase_amount': [100, 200, 300, 400, 500] 33 | }) 34 | pandas_agent = PandasAgent("gpt-3.5-turbo") 35 | .add_dataframe(df) 36 | 37 | pandas_agent.stream("I want to get the top customers which making the most purchases") 38 | ``` 39 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/states.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List, Optional 2 | 3 | from langgraph.graph.message import add_messages 4 | from ryoma_ai.models.agent import ColumnExplorationResult, FormatRestriction 5 | from typing_extensions import Annotated, TypedDict 6 | 7 | 8 | class MessageState(TypedDict, total=False): 9 | messages: Annotated[list, add_messages] 10 | 11 | # SQL Agent fields - optional for all agents 12 | original_question: str 13 | current_step: str 14 | schema_analysis: Optional[Dict] 15 | relevant_tables: Optional[List[Dict]] 16 | query_plan: Optional[Dict] 17 | generated_sql: Optional[str] 18 | validation_result: Optional[Dict] 19 | execution_result: Optional[str] 20 | error_info: Optional[Dict] 21 | safety_check: Optional[Dict] 22 | final_answer: Optional[str] 23 | retry_count: int 24 | max_retries: int 25 | sql_approval_received: bool 26 | 27 | # ReFoRCE Agent fields - also optional 28 | compressed_schema: Optional[str] 29 | format_restriction: Optional[FormatRestriction] 30 | column_exploration: Optional[ColumnExplorationResult] 31 | self_refinement_iterations: int 32 | parallel_candidates: List[Dict[str, Any]] 33 | consensus_result: Optional[str] 34 | confidence_score: float 35 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/agent/internals/metadata_agent.py: -------------------------------------------------------------------------------- 1 | # metadata_agent.py 2 | from typing import Any, Dict 3 | 4 | from ryoma_ai.agent.chat_agent import ChatAgent 5 | 6 | 7 | class MetadataSummarizationAgent(ChatAgent): 8 | def summarize_column(self, column_name: str, profile: Dict[str, Any]) -> str: 9 | prompt = f""" 10 | Column Name: {column_name} 11 | Type: {profile.get("type")} 12 | Null Ratio: {profile.get("null_ratio"):.2f} 13 | Distinct Count: {profile.get("distinct_count")} 14 | """ 15 | if "min" in profile: 16 | prompt += f"Min: {profile['min']}, Max: {profile['max']}, Mean: {profile['mean']:.2f}\n" 17 | if "sample_values" in profile: 18 | prompt += f"Sample Values: {profile['sample_values']}\n" 19 | prompt += f"Min Length: {profile['min_length']}, Max Length: {profile['max_length']}\n" 20 | 21 | prompt += "\nDescribe what this column likely represents." 22 | 23 | return self.chat(prompt).content 24 | 25 | def summarize_schema( 26 | self, schema_profile: Dict[str, Dict[str, Any]] 27 | ) -> Dict[str, str]: 28 | return { 29 | column: self.summarize_column(column, profile) 30 | for column, profile in schema_profile.items() 31 | } 32 | -------------------------------------------------------------------------------- /assets/chakra_color_mode_provider.js: -------------------------------------------------------------------------------- 1 | import { useColorMode as chakraUseColorMode } from "@chakra-ui/react"; 2 | import { useTheme } from "next-themes"; 3 | import { useEffect, useState } from "react"; 4 | import { ColorModeContext, defaultColorMode } from "$/utils/context.js"; 5 | 6 | export default function ChakraColorModeProvider({ children }) { 7 | const { theme, resolvedTheme, setTheme } = useTheme(); 8 | const { colorMode, toggleColorMode } = chakraUseColorMode(); 9 | const [resolvedColorMode, setResolvedColorMode] = useState(colorMode); 10 | 11 | useEffect(() => { 12 | if (colorMode != resolvedTheme) { 13 | toggleColorMode(); 14 | } 15 | setResolvedColorMode(resolvedTheme); 16 | }, [theme, resolvedTheme]); 17 | 18 | const rawColorMode = colorMode; 19 | const setColorMode = (mode) => { 20 | const allowedModes = ["light", "dark", "system"]; 21 | if (!allowedModes.includes(mode)) { 22 | console.error( 23 | `Invalid color mode "${mode}". Defaulting to "${defaultColorMode}".` 24 | ); 25 | mode = defaultColorMode; 26 | } 27 | setTheme(mode); 28 | }; 29 | return ( 30 | 33 | {children} 34 | 35 | ); 36 | } 37 | -------------------------------------------------------------------------------- /docs/source/reference/index.md: -------------------------------------------------------------------------------- 1 | (ryoma-reference)= 2 | 3 | # 📚 API Reference 4 | 5 | Complete reference documentation for Ryoma's APIs, agents, data sources, and tools. 6 | 7 | ## 🎯 Quick Navigation 8 | 9 | | 🤖 Component | 📝 Description | 🔗 Link | 10 | |--------------|----------------|---------| 11 | | **Core API** | Base classes, stores, and configuration | [API Reference →](api/index.md) | 12 | | **Agents** | AI-powered data analysis agents | [Agents →](agent/index.md) | 13 | | **Data Sources** | Database and file connectors | [Data Sources →](data-sources/index.md) | 14 | | **Tools** | Specialized analysis tools | [Tools →](tool/index.md) | 15 | | **Models** | LLM integrations and configurations | [Models →](models/index.md) | 16 | | **Profiling** | Database metadata extraction | [Profiling →](profiling/index.md) | 17 | 18 | ## 🚀 Latest Features 19 | 20 | - **Enhanced SQL Agent** - Multi-step reasoning with safety validation 21 | - **ReFoRCE Agent** - State-of-the-art self-refinement capabilities 22 | - **Database Profiling** - Comprehensive metadata extraction with Ibis 23 | - **Advanced Tools** - Query validation, optimization, and explanation 24 | - **Safety Framework** - Configurable validation and security policies 25 | 26 | ```{toctree} 27 | :maxdepth: 2 28 | 29 | api/index 30 | agent/index 31 | data-sources/index 32 | tool/index 33 | models/index 34 | profiling/index 35 | ``` 36 | -------------------------------------------------------------------------------- /scripts/langchain_test.py: -------------------------------------------------------------------------------- 1 | from langchain.chains import create_sql_query_chain 2 | from langchain_community.tools import QuerySQLDataBaseTool 3 | from langchain_community.utilities import SQLDatabase 4 | from langchain_core.tools import tool 5 | from langchain_openai import ChatOpenAI 6 | 7 | 8 | @tool 9 | def multiply(a: int, b: int) -> int: 10 | """Multiply two integers together. 11 | 12 | Args: 13 | a: First integer 14 | b: Second integer 15 | """ 16 | return a * b 17 | 18 | 19 | # print(json.dumps(convert_to_openai_tool(multiply), indent=2)) 20 | # 21 | # llm_with_tool = llm.bind( 22 | # tools=[convert_to_openai_tool(multiply)], 23 | # tool_choice={"type": "function", "function": {"name": "multiply"}}, 24 | # ) 25 | # print(llm_with_tool.invoke( 26 | # "what's five times four" 27 | # )) 28 | 29 | 30 | db = SQLDatabase.from_uri("") 31 | # print(db.dialect) 32 | # print(db.get_usable_table_names()) 33 | # db.run("SELECT * FROM orders LIMIT 10;") 34 | 35 | query_tool = QuerySQLDataBaseTool(db=db) 36 | llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0).bind_tools( 37 | [multiply, query_tool] 38 | ) 39 | chain = create_sql_query_chain(llm, db) 40 | response = chain.invoke( 41 | {"question": "the top 10 customers buying the most number of orders"} 42 | ) 43 | print(response) 44 | 45 | print(llm.invoke("the top 10 customers buying the most number of orders")) 46 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/states/tool.py: -------------------------------------------------------------------------------- 1 | import reflex as rx 2 | from ryoma_ai import tool 3 | from ryoma_lab.models.tool import Tool, ToolArg 4 | from ryoma_lab.states.ai import AIState 5 | from ryoma_lab.states.utils import ( 6 | get_model_classes, 7 | get_model_fields, 8 | get_model_fields_as_dict, 9 | ) 10 | 11 | 12 | class ToolState(AIState): 13 | tools: list[Tool] 14 | 15 | @rx.var 16 | def tool_names(self) -> list[str]: 17 | return [t.name for t in self.tools] 18 | 19 | def load_tools(self): 20 | self.tools = [] 21 | for t in get_model_classes(tool): 22 | name, cls = t 23 | description = get_model_fields(cls, "description") 24 | args_schema = get_model_fields(cls, "args_schema") 25 | args = get_model_fields_as_dict(args_schema) 26 | self.tools.append( 27 | Tool( 28 | name=name, 29 | description=description, 30 | args=[ 31 | ToolArg( 32 | name=arg["name"], 33 | required=arg["required"], 34 | description=arg["description"], 35 | ) 36 | for arg in args.values() 37 | ], 38 | ) 39 | ) 40 | 41 | def on_load(self): 42 | self.load_tools() 43 | -------------------------------------------------------------------------------- /docs/source/architecture/enhanced-sql-agent-workflow.mmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: Enhanced SQL Agent Workflow 3 | --- 4 | graph TD 5 | %% Main Workflow 6 | A[🤔 User Question] --> B[🔍 Analyze Question] 7 | B --> C[🔗 Schema Linking] 8 | C --> D[📋 Query Planning] 9 | D --> E[⚡ Generate SQL] 10 | E --> F[🛡️ Validate Safety] 11 | 12 | %% Safety Check Branch 13 | F --> G{🔒 Safety Check} 14 | G -->|✅ Pass| H[▶️ Execute Query] 15 | G -->|❌ Fail| M[📝 Format Response] 16 | 17 | %% Execution Branch 18 | H --> I{📊 Execution Result} 19 | I -->|✅ Success| M[📝 Format Response] 20 | I -->|❌ Error| J[🛠️ Handle Error] 21 | I -->|🔄 Retry Needed| E 22 | 23 | %% Error Handling Branch 24 | J --> K{🤔 Should Retry?} 25 | K -->|🔄 Yes, Retry < Max| E 26 | K -->|🛑 No, Give Up| M 27 | 28 | %% Final Output 29 | M --> N[📝 Final Answer] 30 | 31 | %% Styling 32 | classDef startEnd fill:#e3f2fd,stroke:#1976d2,stroke-width:3px,color:#000 33 | classDef process fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px,color:#000 34 | classDef decision fill:#fff8e1,stroke:#f57c00,stroke-width:2px,color:#000 35 | classDef error fill:#ffebee,stroke:#d32f2f,stroke-width:2px,color:#000 36 | classDef success fill:#e8f5e8,stroke:#388e3c,stroke-width:2px,color:#000 37 | 38 | class A,N startEnd 39 | class B,C,D,E,H,M process 40 | class G,I,K decision 41 | class J error 42 | class F success 43 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/upload.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | import reflex as rx 4 | from ryoma_lab import styles 5 | 6 | 7 | def upload_render(files: list[str], handle_upload: Any): 8 | """The main view.""" 9 | return rx.vstack( 10 | rx.upload( 11 | rx.vstack( 12 | rx.button( 13 | "Select File", 14 | ), 15 | rx.text("Drag and drop files here or click to select files"), 16 | align="center", 17 | ), 18 | id="upload2", 19 | multiple=True, 20 | accept={ 21 | "application/pdf": [".pdf"], 22 | "image/png": [".png"], 23 | "image/jpeg": [".jpg", ".jpeg"], 24 | "image/gif": [".gif"], 25 | "image/webp": [".webp"], 26 | "text/html": [".html", ".htm"], 27 | }, 28 | max_files=5, 29 | disabled=False, 30 | on_drop=handle_upload(rx.upload_files(upload_id="upload2")), 31 | border=styles.border, 32 | padding="5em", 33 | ), 34 | rx.grid( 35 | rx.foreach( 36 | files, 37 | lambda file: rx.vstack( 38 | rx.text(file), 39 | ), 40 | ), 41 | columns="2", 42 | spacing="1", 43 | ), 44 | padding="5em", 45 | ) 46 | -------------------------------------------------------------------------------- /tests/unit_tests/test_datasource.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | from unittest.mock import MagicMock, patch 3 | 4 | import pytest 5 | from ryoma_ai.datasource.metadata import Catalog 6 | from ryoma_ai.datasource.sql import SqlDataSource 7 | 8 | 9 | class MockSqlDataSource(SqlDataSource): 10 | def get_query_plan(self, query: str) -> Any: 11 | pass 12 | 13 | def crawl_catalog(self, **kwargs): 14 | pass 15 | 16 | def _connect(self) -> Any: 17 | mock_connection = MagicMock() 18 | mock_cursor = MagicMock() 19 | mock_cursor.fetchall.return_value = [("result1",), ("result2",)] 20 | mock_cursor.execute.return_value = None 21 | mock_connection.cursor.return_value = mock_cursor 22 | return mock_connection 23 | 24 | def get_catalog(self, **kwargs) -> Catalog: 25 | return Catalog() 26 | 27 | 28 | @pytest.fixture 29 | def mock_sql_data_source(): 30 | data_source = MockSqlDataSource() 31 | return data_source 32 | 33 | 34 | def test_execute_query(mock_sql_data_source): 35 | with patch("ryoma_ai.datasource.sql.SqlDataSource.query") as mock_execute: 36 | mock_execute.return_value = "success" 37 | results = mock_sql_data_source.query("SELECT * FROM table") 38 | assert results == "success" 39 | 40 | 41 | def test_sql_datasource_field_exists(mock_sql_data_source): 42 | assert hasattr(mock_sql_data_source, "database") 43 | assert hasattr(mock_sql_data_source, "db_schema") 44 | -------------------------------------------------------------------------------- /assets/github.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/services/kernel/base.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import traceback 3 | from abc import abstractmethod 4 | from typing import Any, Dict, Optional 5 | 6 | from ryoma_ai.datasource.base import DataSource 7 | 8 | 9 | class BaseKernel: 10 | datasource: DataSource 11 | 12 | def __init__(self, datasource: Optional[DataSource] = None, **kwargs): 13 | self.datasource = datasource 14 | 15 | async def execute_code(self, code: str) -> Dict[str, Any]: 16 | loop = asyncio.get_event_loop() 17 | return await loop.run_in_executor(None, self.execute, code) 18 | 19 | @abstractmethod 20 | def execute(self, code: str) -> Dict[str, Any]: 21 | pass 22 | 23 | def _create_error_response(self, error: Exception) -> Dict[str, Any]: 24 | return { 25 | "output_type": "error", 26 | "ename": type(error).__name__, 27 | "evalue": str(error), 28 | "traceback": self._format_traceback(error), 29 | } 30 | 31 | def _create_success_response(self, result: Any) -> Dict[str, Any]: 32 | return { 33 | "output_type": "execute_result", 34 | "data": {"text/plain": str(result)} if result is not None else None, 35 | } 36 | 37 | def _format_traceback(self, error: Exception) -> str: 38 | return "".join( 39 | traceback.format_exception(type(error), error, error.__traceback__) 40 | ) 41 | 42 | def set_datasource(self, datasource: DataSource): 43 | self.datasource = datasource 44 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/reactflow.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, List 2 | 3 | import reflex as rx 4 | 5 | 6 | class ReactFlowLib(rx.Component): 7 | """A component that wraps a react flow lib.""" 8 | 9 | library = "reactflow" 10 | 11 | def _get_custom_code(self) -> str: 12 | return """import 'reactflow/dist/style.css'; 13 | """ 14 | 15 | 16 | class ReactFlow(ReactFlowLib): 17 | tag = "ReactFlow" 18 | 19 | nodes: rx.Var[List[Dict[str, Any]]] 20 | 21 | edges: rx.Var[List[Dict[str, Any]]] 22 | 23 | fit_view: rx.Var[bool] 24 | 25 | nodes_draggable: rx.Var[bool] 26 | 27 | nodes_connectable: rx.Var[bool] 28 | 29 | nodes_focusable: rx.Var[bool] 30 | 31 | on_nodes_change: rx.EventHandler[lambda e0: [e0]] 32 | 33 | on_edges_change: rx.EventHandler[lambda e0: [e0]] 34 | 35 | on_connect: rx.EventHandler[lambda e0: [e0]] 36 | 37 | def _get_custom_code(self) -> str: 38 | return """import 'reactflow/dist/style.css'; 39 | """ 40 | 41 | 42 | class Background(ReactFlowLib): 43 | tag = "Background" 44 | 45 | color: rx.Var[str] 46 | 47 | gap: rx.Var[int] 48 | 49 | size: rx.Var[int] 50 | 51 | variant: rx.Var[str] 52 | 53 | 54 | class Controls(ReactFlowLib): 55 | tag = "Controls" 56 | 57 | 58 | class ApplyNodeChanges(ReactFlowLib): 59 | tag = "applyNodeChanges" 60 | 61 | 62 | react_flow = ReactFlow.create 63 | background = Background.create 64 | controls = Controls.create 65 | apply_node_changes = ApplyNodeChanges.create 66 | -------------------------------------------------------------------------------- /tests/unit_tests/test_agent.py: -------------------------------------------------------------------------------- 1 | import openai 2 | import openai_responses 3 | import pytest 4 | from openai_responses import OpenAIMock 5 | from ryoma_ai.agent.chat_agent import ChatAgent 6 | 7 | from tests.unit_tests.test_utils import ( 8 | create_chat_completion_response_stream, 9 | mock_chat_response, 10 | ) 11 | 12 | 13 | @pytest.fixture(autouse=True) 14 | def mock_openai_api_key(monkeypatch): 15 | monkeypatch.setenv("OPENAI_API_KEY", "foo") 16 | 17 | 18 | @openai_responses.mock() 19 | def test_create_chat_completion_stream(openai_mock: OpenAIMock): 20 | openai_mock.chat.completions.create.response = ( 21 | create_chat_completion_response_stream 22 | ) 23 | 24 | client = openai.Client(api_key="sk-fake123") 25 | completion = client.chat.completions.create( 26 | model="gpt-4o", 27 | messages=[ 28 | {"role": "system", "content": "You are a helpful assistant."}, 29 | {"role": "user", "content": "Hello!"}, 30 | ], 31 | stream=True, 32 | ) 33 | 34 | received_chunks = 0 35 | 36 | for chunk in completion: 37 | received_chunks += 1 38 | assert chunk.id 39 | 40 | assert received_chunks == 3 41 | 42 | 43 | @pytest.fixture 44 | def agent(): 45 | return ChatAgent("gpt-3.5-turbo") 46 | 47 | 48 | @openai_responses.mock() 49 | def test_chat(agent, openai_mock: OpenAIMock): 50 | openai_mock.chat.completions.create.response = mock_chat_response("Hello, world!") 51 | chat_response = agent.invoke("Hello, world!", display=False) 52 | assert chat_response.content == "Hello, world!" 53 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/embedding.py: -------------------------------------------------------------------------------- 1 | import reflex as rx 2 | from ryoma_lab.components.model_selector import embedding_model_selector 3 | from ryoma_lab.states.ai import AIState 4 | 5 | 6 | def model_config_render() -> rx.Component: 7 | return rx.vstack( 8 | rx.hstack( 9 | rx.text("API Key"), 10 | rx.input( 11 | value=AIState.api_key, 12 | on_change=AIState.set_api_key, 13 | ), 14 | ), 15 | rx.hstack( 16 | rx.text("Dimension"), 17 | rx.input( 18 | value=AIState.dimension, 19 | on_change=AIState.set_dimension, 20 | ), 21 | ), 22 | width="100%", 23 | spacing="4", 24 | ) 25 | 26 | 27 | def embedding_component() -> rx.Component: 28 | return rx.vstack( 29 | rx.hstack( 30 | rx.text("Model", width="100px"), 31 | embedding_model_selector( 32 | AIState.selected_model, 33 | AIState.set_model, 34 | ), 35 | ), 36 | rx.hstack( 37 | rx.text("API Key", width="100px"), 38 | rx.input( 39 | value=AIState.api_key, 40 | on_change=AIState.set_api_key, 41 | type="password", 42 | ), 43 | ), 44 | rx.hstack( 45 | rx.text("Dimension", width="100px"), 46 | rx.input( 47 | value=AIState.dimension, 48 | on_change=AIState.set_dimension, 49 | ), 50 | ), 51 | width="100%", 52 | padding_x="2em", 53 | ) 54 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/models/cell.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, Coroutine, List, Literal, Optional, Union 2 | 3 | import pandas as pd 4 | import reflex as rx 5 | 6 | 7 | class CellOutput(rx.Base): 8 | output_type: Literal["stream", "execute_result", "dataframe", "error"] 9 | ename: str = "" 10 | evalue: str = "" 11 | traceback: str = "" 12 | 13 | 14 | class DataframeOutput(CellOutput): 15 | output_type: Literal["stream", "execute_result", "dataframe", "error"] = "dataframe" 16 | dataframe: pd.DataFrame 17 | 18 | 19 | class StreamOutput(CellOutput): 20 | output_type: Literal["stream", "execute_result", "dataframe", "error"] = "stream" 21 | text: str 22 | 23 | 24 | class ExecuteResultOutput(CellOutput): 25 | output_type: Literal["stream", "execute_result", "dataframe", "error"] = ( 26 | "execute_result" 27 | ) 28 | execute_result: Union[dict[str, Any], None] = None 29 | 30 | 31 | class ErrorOutput(CellOutput): 32 | output_type: Literal["stream", "execute_result", "dataframe", "error"] = "error" 33 | 34 | 35 | class UnknownOutput(ErrorOutput): 36 | text: str = "Unknown output type" 37 | 38 | 39 | class Cell(rx.Base): 40 | cell_type: str = "code" 41 | content: str = "" 42 | output: List[ 43 | Union[ 44 | StreamOutput, 45 | ExecuteResultOutput, 46 | DataframeOutput, 47 | ErrorOutput, 48 | UnknownOutput, 49 | ] 50 | ] = [] 51 | tool_id: Optional[str] = None 52 | execute_function: Optional[Callable[[str, str], Coroutine[Any, Any, None]]] = None 53 | update_function: Optional[Callable[[str, str], None]] = None 54 | -------------------------------------------------------------------------------- /packages/ryoma_ai/README.md: -------------------------------------------------------------------------------- 1 | # Ryoma 2 | 3 | Ryoma lib is the core component of the project which includes: 4 | - **Data Sources** that can be used to fetch data from different sources 5 | - **Agents** that can be used to process data with AI models 6 | - **Tools** that can be used by agent to process data 7 | 8 | ## Installation 9 | 10 | ### Basic Installation 11 | ```bash 12 | pip install ryoma_ai 13 | ``` 14 | 15 | ### Installing with Optional Dependencies 16 | 17 | Ryoma AI uses lazy imports for datasource dependencies, so you only need to install the dependencies for the datasources you plan to use: 18 | 19 | ```bash 20 | # For PostgreSQL support 21 | pip install ryoma_ai[postgres] 22 | 23 | # For MySQL support 24 | pip install ryoma_ai[mysql] 25 | 26 | # For Snowflake support 27 | pip install ryoma_ai[snowflake] 28 | 29 | # For BigQuery support 30 | pip install ryoma_ai[bigquery] 31 | 32 | # For DuckDB support 33 | pip install ryoma_ai[duckdb] 34 | 35 | # For DynamoDB support 36 | pip install ryoma_ai[dynamodb] 37 | 38 | # For Apache Iceberg support 39 | pip install ryoma_ai[iceberg] 40 | 41 | # For PySpark support 42 | pip install ryoma_ai[pyspark] 43 | 44 | # Multiple datasources 45 | pip install ryoma_ai[postgres,mysql,duckdb] 46 | ``` 47 | 48 | ## Usage 49 | 50 | ```python 51 | from ryoma_ai.datasource.postgres import PostgresDataSource 52 | from ryoma_ai.agent.sql import SqlAgent 53 | 54 | datasource = PostgresDataSource("postgresql://user:password@localhost/db") 55 | sql_agent = SqlAgent("gpt-3.5-turbo").add_datasource(datasource) 56 | sql_agent.stream("Get the top 10 rows from the data source") 57 | ``` 58 | 59 | ## Documentation 60 | Visit the [documentation](https://project-ryoma.github.io/ryoma/) for more information. 61 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/tool/python_tool.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any, Dict, Sequence, Type, Union 3 | 4 | from IPython import get_ipython 5 | from IPython.core.interactiveshell import ExecutionResult, InteractiveShell 6 | from langchain_core.tools import BaseTool 7 | from pydantic import BaseModel, Field 8 | 9 | log = logging.getLogger(__name__) 10 | 11 | 12 | class PythonInput(BaseModel): 13 | script: str = Field(description="python script") 14 | 15 | 16 | class PythonTool(BaseTool): 17 | """Tool for running python script in an IPython environment.""" 18 | 19 | name: str = "run_ipython_script_tool" 20 | description: str = """ 21 | Execute a python script in an IPython environment and return the result of the last expression. 22 | If the script is not correct, an error message will be returned. 23 | """ 24 | args_schema: Type[BaseModel] = PythonInput 25 | 26 | ipython: InteractiveShell = None 27 | 28 | def __init__(self, /, **data: Any): 29 | super().__init__(**data) 30 | self.ipython = get_ipython() 31 | if not self.ipython: 32 | self.ipython = InteractiveShell() 33 | 34 | def _run( 35 | self, 36 | script, 37 | ) -> Union[str, Sequence[Dict[str, Any]], ExecutionResult]: 38 | """Execute the script, return the result or an error message.""" 39 | try: 40 | result = self.ipython.run_cell(script) 41 | return result 42 | except Exception as e: 43 | return str(e) 44 | 45 | def update_script_context(self, script_context: Any): 46 | try: 47 | self.ipython.user_ns.update(script_context) 48 | except Exception as e: 49 | return str(e) 50 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/embedding/client.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from abc import ABC, abstractmethod 3 | from typing import List, Optional 4 | 5 | from langchain_core.embeddings import Embeddings 6 | from ryoma_ai.llm.provider import load_model_provider 7 | 8 | 9 | def get_embedding_client( 10 | selected_model: str, model_parameters: dict[str, str] = None 11 | ) -> Embeddings: 12 | logging.info(f"Creating embedding client for {selected_model}") 13 | return load_model_provider( 14 | selected_model, 15 | "embedding", 16 | model_parameters=model_parameters, 17 | ) 18 | 19 | 20 | class EmbeddingClient(ABC): 21 | @abstractmethod 22 | def embed(self, text: str) -> List[float]: 23 | pass 24 | 25 | def embed_batch(self, texts: List[str]) -> List[List[float]]: 26 | return [self.embed(t) for t in texts] 27 | 28 | @abstractmethod 29 | def langchain(self) -> Embeddings: 30 | pass 31 | 32 | 33 | class LangchainEmbeddingClient(EmbeddingClient): 34 | def __init__(self, lc_embedder: Embeddings): 35 | self._embedder = lc_embedder 36 | 37 | def embed(self, text: str) -> List[float]: 38 | return self._embedder.embed_query(text) 39 | 40 | def embed_batch(self, texts: List[str]) -> List[List[float]]: 41 | return self._embedder.embed_documents(texts) 42 | 43 | def langchain(self) -> Embeddings: 44 | return self._embedder 45 | 46 | 47 | def create_embedder( 48 | model_name: str, model_parameters: Optional[dict] = None 49 | ) -> EmbeddingClient: 50 | lc_embedder = get_embedding_client( 51 | model_name, 52 | model_parameters=model_parameters or {}, 53 | ) 54 | return LangchainEmbeddingClient(lc_embedder) 55 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/datasource/bigquery.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Optional 3 | 4 | import ibis 5 | from databuilder.job.job import DefaultJob 6 | from databuilder.loader.base_loader import Loader 7 | from databuilder.task.task import DefaultTask 8 | from ibis import BaseBackend 9 | from langchain_core.pydantic_v1 import Field 10 | from pyhocon import ConfigFactory 11 | from ryoma_ai.datasource.sql import SqlDataSource 12 | 13 | 14 | class BigqueryDataSource(SqlDataSource): 15 | project_id: str = Field(..., description="Bigquery current_store ID") 16 | dataset_id: str = Field(..., description="Bigquery dataset ID") 17 | credentials: Optional[str] = Field(None, description="Path to the credentials file") 18 | 19 | def _connect(self, **kwargs) -> BaseBackend: 20 | return ibis.bigquery.connect( 21 | project_id=self.project_id, 22 | dataset_id=self.dataset_id, 23 | credentials=self.credentials, 24 | **kwargs, 25 | ) 26 | 27 | def crawl_catalog(self, loader: Loader, where_clause_suffix: Optional[str] = ""): 28 | from databuilder.extractor.bigquery_metadata_extractor import ( 29 | BigQueryMetadataExtractor, 30 | ) 31 | 32 | logging.info("Crawling data catalog from Bigquery") 33 | job_config = ConfigFactory.from_dict( 34 | { 35 | "extractor.bigquery_table_metadata.{}".format( 36 | BigQueryMetadataExtractor.PROJECT_ID_KEY 37 | ) 38 | } 39 | ) 40 | job = DefaultJob( 41 | conf=job_config, 42 | task=DefaultTask(extractor=BigQueryMetadataExtractor(), loader=loader), 43 | ) 44 | 45 | job.launch() 46 | -------------------------------------------------------------------------------- /.github/workflows/gitpages.yml: -------------------------------------------------------------------------------- 1 | name: Generate and publish docs 2 | 3 | on: 4 | pull_request: 5 | branches: ["main"] 6 | # Runs on pushes targeting the default branch 7 | push: 8 | branches: ["main"] 9 | # Allows you to run this workflow manually from the Actions tab 10 | workflow_dispatch: 11 | 12 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 13 | permissions: 14 | contents: write 15 | pages: write 16 | id-token: write 17 | 18 | jobs: 19 | build: 20 | runs-on: ubuntu-latest 21 | steps: 22 | - uses: actions/checkout@v2 23 | - name: Setup Python 24 | uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0 25 | with: 26 | python-version: '3.10' 27 | - name: Install Dependencies 28 | run: | 29 | python -m pip install --quiet --upgrade pip setuptools wheel 30 | python -m pip install -r docs/source/requirements.txt 31 | - name: Build Docs 32 | run: | 33 | cd docs/ 34 | make html 35 | - name: Upload artifact 36 | uses: actions/upload-pages-artifact@56afc609e74202658d3ffba0e8f6dda462b719fa # v3.0.1 37 | with: 38 | path: 'docs/build/html' 39 | deploy: 40 | needs: 'build' 41 | environment: 42 | name: github-pages 43 | url: ${{ steps.deployment.outputs.page_url }} 44 | runs-on: ubuntu-latest 45 | steps: 46 | - name: Setup Pages 47 | uses: actions/configure-pages@983d7736d9b0ae728b81ab479565c72886d7745b # v5.0.0 48 | with: 49 | enablement: true 50 | - name: Deploy to GitHub Pages 51 | id: deployment 52 | uses: actions/deploy-pages@d6db90164ac5ed86f2b6aed7e0febac5b3c0c03e # v4.0.5 -------------------------------------------------------------------------------- /example_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "model": "gpt-4o", 3 | "mode": "enhanced", 4 | 5 | "meta_store": { 6 | "type": "postgres", 7 | "connection_string": "postgresql://user:password@localhost:5432/metadata_db", 8 | "options": { 9 | "timeout": 30, 10 | "pool_size": 10 11 | } 12 | }, 13 | 14 | "vector_store": { 15 | "type": "pgvector", 16 | "collection_name": "ryoma_vectors", 17 | "dimension": 768, 18 | "distance_metric": "cosine", 19 | "extra_configs": { 20 | "connection_string": "postgresql://postgres:postgres@localhost:5432/vectordb", 21 | "host": "localhost", 22 | "port": 5432, 23 | "database": "vectordb", 24 | "user": "postgres", 25 | "password": "postgres", 26 | "distance_strategy": "cosine" 27 | } 28 | }, 29 | 30 | "datasources": [ 31 | { 32 | "name": "default", 33 | "type": "postgres", 34 | "host": "localhost", 35 | "port": 5432, 36 | "database": "postgres", 37 | "user": "postgres", 38 | "password": "password" 39 | }, 40 | { 41 | "name": "analytics_db", 42 | "type": "mysql", 43 | "host": "analytics.company.com", 44 | "port": 3306, 45 | "database": "analytics", 46 | "user": "analytics_user", 47 | "password": "analytics_password" 48 | }, 49 | { 50 | "name": "warehouse", 51 | "type": "snowflake", 52 | "account": "your_account", 53 | "user": "warehouse_user", 54 | "password": "warehouse_password", 55 | "database": "warehouse", 56 | "schema": "public", 57 | "warehouse": "compute_wh" 58 | } 59 | ], 60 | 61 | "agent": { 62 | "auto_approve_all": false, 63 | "retry_count": 3, 64 | "timeout_seconds": 300 65 | } 66 | } -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/datasource/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any, Dict, Optional 3 | 4 | from databuilder.loader.base_loader import Loader 5 | from pydantic import BaseModel 6 | from ryoma_ai.datasource.metadata import Catalog 7 | 8 | 9 | class DataSource(BaseModel, ABC): 10 | model_config = { 11 | "arbitrary_types_allowed": True, 12 | "extra": "allow", 13 | } 14 | 15 | def __init__(self, type: str, **kwargs: Any): 16 | super().__init__(**kwargs) 17 | self.type = type 18 | 19 | @abstractmethod 20 | def get_catalog(self, **kwargs: Dict[str, Any]) -> Catalog: 21 | raise NotImplementedError("get_catalog is not implemented for this data source") 22 | 23 | @abstractmethod 24 | def crawl_catalog( 25 | self, loader: Loader, **kwargs: Dict[str, Any] 26 | ) -> Optional[Catalog]: 27 | raise NotImplementedError( 28 | "crawl_catalog is not implemented for this data source." 29 | ) 30 | 31 | @abstractmethod 32 | def prompt(self, schema: Optional[str] = None, table: Optional[str] = None) -> str: 33 | raise NotImplementedError("prompt is not implemented for this data source.") 34 | 35 | @abstractmethod 36 | def profile_table(self, table_name: str, **kwargs: Dict[str, Any]) -> dict: 37 | """ 38 | Profile a table and return its metadata. 39 | 40 | Args: 41 | table_name (str): The name of the table to profile. 42 | **kwargs: Additional parameters for profiling. 43 | 44 | Returns: 45 | dict: A dictionary containing the table's metadata. 46 | """ 47 | raise NotImplementedError( 48 | "profile_table is not implemented for this data source." 49 | ) 50 | -------------------------------------------------------------------------------- /packages/ryoma_lab/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling>=1.4.0"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "ryoma_lab" 7 | version = "0.0.1" 8 | description = "Interactive UI for Ryoma AI Platform" 9 | readme = "README.md" 10 | repository = "https://github.com/project-ryoma/ryoma" 11 | homepage = "https://www.ryoma-ai.com" 12 | requires-python = ">=3.9" 13 | 14 | dependencies = [ 15 | "alembic>=1.13.1", 16 | "reflex>=0.7.0", 17 | "transformers>=4.40.2", 18 | "sql-metadata>=2.11.0", 19 | "amundsen-databuilder>=7.5.0", 20 | "ibis-framework>=10.0.0", 21 | "fastapi-users[sqlalchemy,oauth]>=13.0.0", 22 | "pypdf>=4.3.1", 23 | "sql-formatter>=0.6.2", 24 | "torch>=2.4.0", 25 | "gpt4all>=2.8.2", 26 | "langchain-huggingface>=0.0.3", 27 | "langchain-qdrant>=0.1.3", 28 | "ipython>=8.27.0", 29 | "jupyter-ai-magics>=2.24.0", 30 | "langgraph>=0.2.28", 31 | "nbformat>=5.10.4", 32 | "duckdb>=1.1.1", 33 | ] 34 | 35 | [tool.uv.sources] 36 | ryoma_ai = { path = "./packages/ryoma_ai" } 37 | 38 | [project.scripts] 39 | ryoma = "ryoma_lab.cli:main" 40 | 41 | 42 | [project.optional-dependencies] 43 | snowflake = [ 44 | "ibis-framework[snowflake]>=9.0.0", 45 | "amundsen-databuilder[snowflake]>=7.5.0", 46 | ] 47 | sqlite = [ 48 | "ibis-framework[sqlite]>=9.0.0", 49 | ] 50 | postgres = [ 51 | "psycopg2>=2.9.2", 52 | "ibis-framework[postgres]>=9.0.0", 53 | ] 54 | mysql = [ 55 | "ibis-framework[mysql]>=9.0.0", 56 | "amundsen-databuilder[rds]>=7.5.0", 57 | ] 58 | bigquery = [ 59 | "ibis-framework[bigquery]>=9.0.0", 60 | "amundsen-databuilder[bigquery]>=7.5.0", 61 | ] 62 | pyspark = [ 63 | "ibis-framework[pyspark]>=9.0.0", 64 | "pyspark>=3.2.0", 65 | "findspark>=1.4.2", 66 | ] 67 | -------------------------------------------------------------------------------- /docs/source/reference/api/index.md: -------------------------------------------------------------------------------- 1 | # 🔧 API Reference 2 | 3 | Complete API documentation for Ryoma's core components and interfaces. 4 | 5 | ## 🎯 Core APIs 6 | 7 | | 🧩 Component | 📝 Description | 🔗 Link | 8 | |--------------|----------------|---------| 9 | | **Core API** | Base agents, stores, and configuration | [Core API →](core.md) | 10 | 11 | ## 🏗️ Architecture APIs 12 | 13 | The API reference covers Ryoma's unified three-tier architecture: 14 | 15 | ### 1. **Agent Layer** 16 | - Base agent functionality and common interfaces 17 | - Agent factory and creation patterns 18 | - Multi-agent routing and coordination 19 | 20 | ### 2. **Store Layer** 21 | - Metadata store management 22 | - Vector store operations 23 | - Unified store coordination 24 | 25 | ### 3. **Data Layer** 26 | - Data source connections and management 27 | - Catalog indexing and search 28 | - Query execution and validation 29 | 30 | ## 🚀 Quick Examples 31 | 32 | ### Agent Creation 33 | ```python 34 | from ryoma_ai.agent.factory import AgentFactory 35 | 36 | # Create any agent type 37 | agent = AgentFactory.create_agent( 38 | agent_type="sql", 39 | model="gpt-4o", 40 | datasource=datasource, 41 | store=meta_store, 42 | vector_store=vector_store 43 | ) 44 | ``` 45 | 46 | ### Store Management 47 | ```python 48 | from ryoma_ai.store.store_factory import StoreFactory 49 | 50 | # Create unified stores 51 | meta_store = StoreFactory.create_store( 52 | store_type="postgres", 53 | connection_string="postgresql://localhost:5432/metadata" 54 | ) 55 | ``` 56 | 57 | ### Catalog Operations 58 | ```python 59 | # Search and index operations 60 | agent.index_datasource(datasource, level="table") 61 | catalog = agent.search_catalogs("customer data", top_k=10) 62 | ``` 63 | 64 | ```{toctree} 65 | :maxdepth: 2 66 | 67 | core 68 | ``` -------------------------------------------------------------------------------- /tests/e2e/download_gpt4all_model.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Helper script to download GPT4All models for testing. 4 | 5 | Usage: 6 | python download_gpt4all_model.py [model_name] 7 | 8 | If no model_name is provided, downloads the default test model. 9 | """ 10 | import sys 11 | from pathlib import Path 12 | 13 | 14 | def download_model(model_name="Llama-3.2-1B-Instruct-Q4_0.gguf"): 15 | """Download a GPT4All model.""" 16 | try: 17 | from gpt4all import GPT4All 18 | 19 | print(f"Downloading GPT4All model: {model_name}") 20 | print("This may take a few minutes depending on your internet connection...") 21 | 22 | # Check if model already exists 23 | cache_dir = Path.home() / ".cache" / "gpt4all" 24 | model_path = cache_dir / model_name 25 | 26 | if model_path.exists(): 27 | print(f"Model {model_name} already exists at {model_path}") 28 | return True 29 | 30 | # Create GPT4All instance - this will trigger download 31 | model = GPT4All(model_name) 32 | 33 | print(f"Model {model_name} downloaded successfully!") 34 | print(f"Model path: {model_path}") 35 | 36 | # Test the model 37 | print("Testing model...") 38 | response = model.generate("Hello, how are you?", max_tokens=50) 39 | print(f"Test response: {response}") 40 | 41 | return True 42 | 43 | except ImportError: 44 | print("GPT4All not available. Please install with: pip install gpt4all") 45 | return False 46 | except Exception as e: 47 | print(f"Error downloading model: {e}") 48 | return False 49 | 50 | 51 | if __name__ == "__main__": 52 | model_name = sys.argv[1] if len(sys.argv) > 1 else "Llama-3.2-1B-Instruct-Q4_0.gguf" 53 | success = download_model(model_name) 54 | sys.exit(0 if success else 1) 55 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/datasource/file.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | import pyarrow as pa 4 | from ryoma_ai.datasource.base import DataSource 5 | from ryoma_ai.datasource.metadata import Table 6 | 7 | 8 | class FileConfig: 9 | pass 10 | 11 | 12 | class FileDataSource(DataSource): 13 | type: str = "file" 14 | file_path: str 15 | file_format: str 16 | file_name: str 17 | 18 | def __init__( 19 | self, 20 | file_path: str, 21 | file_format: str, 22 | file_name: Optional[str] = None, 23 | **kwargs, 24 | ): 25 | self.file_name = file_name or file_path 26 | self.file_path = file_path 27 | self.file_format = file_format 28 | super().__init__( 29 | name=self.file_name, 30 | type="file", 31 | ) 32 | 33 | def get_catalog(self, **kwargs) -> Table: 34 | table_schema = self.to_arrow(**kwargs).schema 35 | return Table( 36 | table_name=self.file_name, 37 | table_columns=[ 38 | {"column_name": name, "column_type": str(table_schema.field(name))} 39 | for name in table_schema.names 40 | ], 41 | ) 42 | 43 | def to_arrow(self, **kwargs) -> pa.Table: 44 | if self.file_format == "csv": 45 | from pyarrow.csv import read_csv 46 | 47 | return read_csv(self.file_path, **kwargs) 48 | elif self.file_format == "parquet": 49 | from pyarrow.parquet import read_table 50 | 51 | return read_table(self.file_path, **kwargs) 52 | elif self.file_format == "json": 53 | from pyarrow.json import read_json 54 | 55 | return read_json(self.file_path, **kwargs) 56 | else: 57 | raise NotImplementedError(f"FileFormat is unsupported: {self.file_format}") 58 | 59 | def to_pandas(self, **kwargs): 60 | return self.to_arrow().to_pandas() 61 | -------------------------------------------------------------------------------- /docs/source/architecture/index.md: -------------------------------------------------------------------------------- 1 | (ryoma-architecture)= 2 | 3 | # Architecture 4 | 5 | Ryoma architecture diagram documentation. 6 | 7 | ## Enhanced SQL Agent 8 | 9 | The Enhanced SQL Agent is a state-of-the-art Text-to-SQL system that combines cutting-edge research insights with enterprise-grade reliability, safety, and performance. 10 | 11 | ### Key Features 12 | 13 | - **Multi-step reasoning** with intelligent workflow management 14 | - **Advanced schema linking** using research-based algorithms 15 | - **Comprehensive safety validation** with configurable policies 16 | - **Intelligent error handling** with automatic recovery 17 | - **ReFoRCE optimizations** for state-of-the-art performance 18 | 19 | ### Documentation 20 | 21 | - **[Enhanced SQL Agent Architecture](enhanced-sql-agent.md)** - Comprehensive technical documentation 22 | - **[Database Profiling System](database-profiling.md)** - Comprehensive metadata extraction and profiling 23 | - **[Quick Reference Guide](sql-agent-quick-reference.md)** - Quick start and API reference 24 | - **[Workflow Diagrams](enhanced-sql-agent-workflow.mmd)** - Visual workflow representations 25 | 26 | ## Database Profiling System 27 | 28 | The Database Profiling System implements comprehensive metadata extraction based on research from the "Automatic Metadata Extraction for Text-to-SQL" paper. 29 | 30 | ### Key Features 31 | 32 | - **Statistical Analysis** - Row counts, NULL statistics, distinct-value ratios 33 | - **Type-Specific Profiling** - Numeric, date, and string analysis 34 | - **Semantic Type Inference** - Automatic detection of emails, phones, URLs, etc. 35 | - **Data Quality Scoring** - Multi-dimensional quality assessment 36 | - **LSH Similarity** - Locality-sensitive hashing for column similarity 37 | - **Top-k Frequent Values** - Most common data patterns 38 | 39 | ```{toctree} 40 | :maxdepth: 2 41 | 42 | architecture 43 | enhanced-sql-agent 44 | database-profiling 45 | sql-agent-quick-reference 46 | ``` 47 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/modal.py: -------------------------------------------------------------------------------- 1 | import reflex as rx 2 | import reflex_chakra as rc 3 | from ryoma_lab.states.workspace import ChatState 4 | 5 | 6 | def modal() -> rx.Component: 7 | """A modal to create a new workspace.""" 8 | return rc.modal( 9 | rc.modal_overlay( 10 | rc.modal_content( 11 | rc.modal_header( 12 | rc.hstack( 13 | rc.text("Create new workspace"), 14 | rc.icon( 15 | tag="close", 16 | font_size="sm", 17 | on_click=ChatState.toggle_modal, 18 | color="#fff8", 19 | _hover={"color": "#fff"}, 20 | cursor="pointer", 21 | ), 22 | align_items="center", 23 | justify_content="space-between", 24 | ) 25 | ), 26 | rc.modal_body( 27 | rc.input( 28 | placeholder="Type something...", 29 | on_blur=ChatState.set_new_chat_name, 30 | bg="#222", 31 | border_color="#fff3", 32 | _placeholder={"color": "#fffa"}, 33 | ), 34 | ), 35 | rc.modal_footer( 36 | rc.button( 37 | "Create", 38 | bg="#5535d4", 39 | box_shadow="md", 40 | px="4", 41 | py="2", 42 | h="auto", 43 | _hover={"bg": "#4c2db3"}, 44 | on_click=ChatState.create_chat, 45 | ), 46 | ), 47 | bg="#222", 48 | color="#fff", 49 | ), 50 | ), 51 | is_open=ChatState.modal_open, 52 | ) 53 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/agent/pandas_agent.py: -------------------------------------------------------------------------------- 1 | import io 2 | from typing import Dict, Optional 3 | 4 | from pandas import DataFrame 5 | from ryoma_ai.agent.workflow import WorkflowAgent 6 | from ryoma_ai.tool.pandas_tool import PandasTool 7 | from ryoma_ai.tool.python_tool import PythonTool 8 | 9 | 10 | class PandasAgent(WorkflowAgent): 11 | description: str = ( 12 | "A pandas agent that can use pandas tools to interact with pandas DataFrames." 13 | ) 14 | 15 | def __init__(self, model: str, model_parameters: Dict = None): 16 | super().__init__( 17 | [ 18 | PandasTool(), 19 | ], 20 | model, 21 | model_parameters, 22 | ) 23 | 24 | def add_dataframe( 25 | self, 26 | dataframe: DataFrame, 27 | df_id: Optional[str] = None, 28 | ) -> "PandasAgent": 29 | """ 30 | Register a DataFrame as a resource, update the prompt context and tool script context. 31 | 32 | Args: 33 | dataframe: The pandas DataFrame to register. 34 | df_id: Optional custom name for the DataFrame. 35 | 36 | Returns: 37 | self (for chaining) 38 | """ 39 | # Register DataFrame in the agent's registry 40 | obj_id = self.register_resource(dataframe) 41 | df_name = df_id or f"df_{obj_id}" 42 | 43 | # Add prompt context (note: dataframe.info() prints, we capture as string) 44 | buffer = io.StringIO() 45 | dataframe.info(buf=buffer) 46 | metadata_str = buffer.getvalue() 47 | 48 | self.add_prompt( 49 | f""" 50 | dataframe name: {df_name} 51 | dataframe metadata:\n{metadata_str} 52 | """ 53 | ) 54 | 55 | # Inject into PythonTool script context 56 | for tool in self.tools: 57 | if isinstance(tool, PythonTool): 58 | tool.update_script_context(script_context={df_name: dataframe}) 59 | 60 | return self 61 | -------------------------------------------------------------------------------- /examples/e2e_example.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pandas as pd 4 | from ryoma_ai.agent.pandas_agent import PandasAgent 5 | from ryoma_ai.agent.sql import SqlAgent 6 | from ryoma_ai.agent.workflow import ToolMode 7 | from ryoma_ai.datasource.postgres import PostgresDataSource 8 | 9 | 10 | def get_postgres_datasource(): 11 | return PostgresDataSource( 12 | host=os.getenv("POSTGRES_HOST", "localhost"), 13 | port=os.getenv("POSTGRES_PORT", 5432), 14 | database=os.getenv("POSTGRES_DB", "postgres"), 15 | user=os.getenv("POSTGRES_USER"), 16 | password=os.getenv("POSTGRES_PASSWORD"), 17 | db_schema=os.getenv("POSTGRES_SCHEMA", "public"), 18 | ) 19 | 20 | 21 | postgres_db = get_postgres_datasource() 22 | 23 | 24 | def run_pandas(): 25 | pandas_agent = PandasAgent("gpt-3.5-turbo") 26 | df = pd.DataFrame( 27 | { 28 | "artist": ["Artist A", "Artist B", "Artist C", "Artist A", "Artist B"], 29 | "album": ["Album 1", "Album 2", "Album 3", "Album 4", "Album 5"], 30 | } 31 | ) 32 | pandas_agent.add_dataframe(df) 33 | pandas_agent.invoke("show me the artits with the most albums in descending order") 34 | pandas_agent.invoke(tool_mode=ToolMode.ONCE) 35 | 36 | 37 | def run_sql_rag(): 38 | sql_agent = SqlAgent( 39 | "gpt-3.5-turbo", 40 | embedding={"model": "text-embedding-3-small"}, 41 | vector_store={ 42 | "type": "pgvector", 43 | }, 44 | ) 45 | sql_agent.index_datasource(postgres_db, level="table") 46 | catalog = sql_agent.search_catalogs( 47 | "I want to get the top 10 artists with the most albums in descending order", 48 | top_k=3, 49 | ) 50 | sql_agent.add_prompt(catalog.prompt) 51 | sql_agent.invoke("show me the tables in the database") 52 | 53 | 54 | def run_sql(): 55 | sql_agent = SqlAgent("gpt-3.5-turbo") 56 | sql_agent.add_prompt(postgres_db.prompt()) 57 | sql_agent.invoke("show me the tables in the database") 58 | 59 | 60 | run_sql() 61 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/store/exceptions.py: -------------------------------------------------------------------------------- 1 | """ 2 | Exception classes for store operations. 3 | """ 4 | 5 | from typing import Optional 6 | 7 | 8 | class StoreException(Exception): 9 | """Base exception for all store-related errors.""" 10 | 11 | def __init__(self, message: str, cause: Optional[Exception] = None): 12 | super().__init__(message) 13 | self.cause = cause 14 | 15 | 16 | class DataSourceNotFoundError(StoreException): 17 | """Raised when a requested data source is not found in the store.""" 18 | 19 | def __init__(self, data_source_id: str, cause: Optional[Exception] = None): 20 | message = f"Data source with ID '{data_source_id}' not found" 21 | super().__init__(message, cause) 22 | self.data_source_id = data_source_id 23 | 24 | 25 | class CatalogNotFoundError(StoreException): 26 | """Raised when a requested catalog is not found in the store.""" 27 | 28 | def __init__( 29 | self, 30 | catalog_id: str, 31 | data_source_id: Optional[str] = None, 32 | cause: Optional[Exception] = None, 33 | ): 34 | if data_source_id: 35 | message = ( 36 | f"Catalog '{catalog_id}' not found for data source '{data_source_id}'" 37 | ) 38 | else: 39 | message = f"Catalog '{catalog_id}' not found" 40 | super().__init__(message, cause) 41 | self.catalog_id = catalog_id 42 | self.data_source_id = data_source_id 43 | 44 | 45 | class DataSourceConnectionError(StoreException): 46 | """Raised when data source connection fails.""" 47 | 48 | def __init__( 49 | self, 50 | data_source_id: str, 51 | connection_error: str, 52 | cause: Optional[Exception] = None, 53 | ): 54 | message = ( 55 | f"Failed to connect to data source '{data_source_id}': {connection_error}" 56 | ) 57 | super().__init__(message, cause) 58 | self.data_source_id = data_source_id 59 | self.connection_error = connection_error 60 | -------------------------------------------------------------------------------- /alembic/versions/34dd3ed73def_.py: -------------------------------------------------------------------------------- 1 | """empty message 2 | 3 | Revision ID: 34dd3ed73def 4 | Revises: 32c47e486eed 5 | Create Date: 2025-08-13 23:10:17.202491 6 | 7 | """ 8 | 9 | from typing import Sequence, Union 10 | 11 | import sqlalchemy as sa 12 | import sqlmodel 13 | 14 | from alembic import op 15 | 16 | # revision identifiers, used by Alembic. 17 | revision: str = "34dd3ed73def" 18 | down_revision: Union[str, None] = "32c47e486eed" 19 | branch_labels: Union[str, Sequence[str], None] = None 20 | depends_on: Union[str, Sequence[str], None] = None 21 | 22 | 23 | def upgrade() -> None: 24 | # ### commands auto generated by Alembic - please adjust! ### 25 | op.create_table( 26 | "documentproject", 27 | sa.Column("id", sa.Integer(), nullable=False), 28 | sa.Column("project_name", sqlmodel.sql.sqltypes.AutoString(), nullable=False), 29 | sa.Column("description", sqlmodel.sql.sqltypes.AutoString(), nullable=True), 30 | sa.Column("document_count", sa.Integer(), nullable=False), 31 | sa.Column("created_at", sa.DateTime(), nullable=True), 32 | sa.Column("updated_at", sa.DateTime(), nullable=True), 33 | sa.Column("is_active", sa.Boolean(), nullable=False), 34 | sa.PrimaryKeyConstraint("id"), 35 | ) 36 | op.drop_table("vectorstore") 37 | # ### end Alembic commands ### 38 | 39 | 40 | def downgrade() -> None: 41 | # ### commands auto generated by Alembic - please adjust! ### 42 | op.create_table( 43 | "vectorstore", 44 | sa.Column("id", sa.INTEGER(), nullable=False), 45 | sa.Column("project_name", sa.VARCHAR(), nullable=False), 46 | sa.Column("online_store", sa.VARCHAR(), nullable=False), 47 | sa.Column("online_store_configs", sa.VARCHAR(), nullable=True), 48 | sa.Column("offline_store", sa.VARCHAR(), nullable=False), 49 | sa.Column("offline_store_configs", sa.VARCHAR(), nullable=True), 50 | sa.PrimaryKeyConstraint("id"), 51 | ) 52 | op.drop_table("documentproject") 53 | # ### end Alembic commands ### 54 | -------------------------------------------------------------------------------- /TODOs.md: -------------------------------------------------------------------------------- 1 | 2 | # TODOs 3 | 4 | ## Tasks 5 | 1. There are two places doing the indexing for datasource catalog and metadata. one is in catalog_manager that use catalog_store, and the other is in vector_store/base that has index_datasource. they do similar functionalities, how this should be refactored and optimized? 6 | 2. for the internal agent implementation, there are multiple llm calls, which use the prompt in the module, I think it would be better to move to the prompt/ folder, and make it more modular, so that we can easily add more prompts in the future. 7 | 3. Add Redshift as a datasource. 8 | 5. Currently the internal sql agent will get all catalogs from the catalog store, and then filter the catalogs based on the user question. This will cause performance issue when there are a lot of catalogs in the store. We need to optimize this, that allow to ask the user to index and search the catalogs. think about how to design and implement this flow. 9 | 4. Update and Optimize the docs, there are a lot of missing docs since last code change and version. we need to update the docs in details now. please check all pages, think about what are missing, especially on tutorial, how to use the agent, how to use the cli, how to set the configs etc. 10 | 11 | 12 | ## backlogs 13 | 1. the impl should be able to index and search the data sources / catalog in the store, so that the agent won't need to load all the datasources / catalog every time. 14 | 2. update the documentation to reflect the changes made in the system. 15 | 3. optimize and add more tests. 16 | 4. fix the mypy for the entire project ryoma_ai 17 | 5. fix the mypy for the entire project ryoma_lab 18 | 19 | ## Important 20 | 1. For any code, no fallback silently. 21 | 2. If any exception happens, raise it specific Exception. if Exception doesn't exsits, create one. 22 | 2. Always try to implement in OOP way, which means more module, and more class so that future extension is easier. 23 | 3. try to model data and logic separately. Try to avoid directly using dict or list to hold data. 24 | 4. Always add type hints for functions and methods. 25 | 5. Always add docstrings for all classes, functions and methods. 26 | 6. Always add relevant tests for new features and logic. -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/prompt/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ryoma AI Prompt System 3 | 4 | A modern, modular prompt management system for AI applications. 5 | 6 | Key Components: 7 | - PromptManager: High-level interface for prompt creation 8 | - PromptType, ExampleFormat, SelectorStrategy: Configuration enums 9 | - prompt_registry: Global registry for custom components 10 | 11 | Basic Usage: 12 | from ryoma_ai.prompt import prompt_manager, PromptType 13 | 14 | # Create a SQL prompt 15 | prompt = prompt_manager.create_sql_prompt( 16 | schema="CREATE TABLE users (id INT, name VARCHAR(100))", 17 | question="What are all the user names?" 18 | ) 19 | 20 | # Create a chat prompt 21 | prompt = prompt_manager.create_chat_prompt( 22 | user_input="Hello, how can you help me?", 23 | context="You are a data analyst assistant" 24 | ) 25 | 26 | Advanced Usage: 27 | # Register custom templates 28 | prompt_manager.register_template( 29 | name="analysis_template", 30 | prompt_type=PromptType.INSTRUCTION_FOLLOWING, 31 | template_string="Analyze this data: {data}", 32 | description="Template for data analysis tasks" 33 | ) 34 | """ 35 | 36 | # Backward compatibility 37 | from ryoma_ai.prompt.base import BasePromptTemplate, BasicContextPromptTemplate 38 | 39 | # Base classes for extensions 40 | # Core components 41 | from ryoma_ai.prompt.core import ( 42 | ExampleFormat, 43 | ExampleFormatter, 44 | ExampleSelector, 45 | PromptBuilder, 46 | PromptConfig, 47 | PromptTemplate, 48 | PromptType, 49 | SelectorStrategy, 50 | prompt_registry, 51 | ) 52 | 53 | # Main interface 54 | from ryoma_ai.prompt.manager import prompt_manager 55 | 56 | __all__ = [ 57 | # Main interface 58 | "prompt_manager", 59 | # Configuration enums 60 | "PromptType", 61 | "ExampleFormat", 62 | "SelectorStrategy", 63 | # Core classes 64 | "PromptConfig", 65 | "PromptTemplate", 66 | "prompt_registry", 67 | # Extension points 68 | "PromptBuilder", 69 | "ExampleSelector", 70 | "ExampleFormatter", 71 | # Backward compatibility 72 | "BasePromptTemplate", 73 | "BasicContextPromptTemplate", 74 | ] 75 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/model_selector.py: -------------------------------------------------------------------------------- 1 | import reflex as rx 2 | from ryoma_lab.models.llm import ChatModelProvider, EmbeddingModelProvider 3 | 4 | 5 | def model_selector( 6 | model_provider, 7 | model_value, 8 | on_model_value_change, 9 | trigger_width: str = "12em", 10 | ) -> rx.Component: 11 | return rx.select.root( 12 | rx.select.trigger( 13 | placeholder="Select a model", 14 | width=trigger_width, 15 | ), 16 | rx.select.content( 17 | *[ 18 | rx.select.group( 19 | rx.select.label(p.value.name), 20 | rx.foreach( 21 | p.value.models, 22 | lambda x: rx.select.item(x, value=f"{p.value.id}:{x}"), 23 | ), 24 | ) 25 | for p in list(model_provider) 26 | ], 27 | ), 28 | value=model_value, 29 | on_change=on_model_value_change, 30 | default_value="gpt-3.5-turbo", 31 | ) 32 | 33 | 34 | def embedding_model_selector( 35 | model_value, 36 | on_model_value_change, 37 | trigger_width: str = "12em", 38 | ) -> rx.Component: 39 | """ 40 | Embedding model selector. 41 | @param model_value: model value selected. 42 | @param on_model_value_change: on model value change. 43 | @param trigger_width: change trigger width. 44 | @return: Embedding model selector component. 45 | """ 46 | return model_selector( 47 | EmbeddingModelProvider, 48 | model_value, 49 | on_model_value_change, 50 | trigger_width, 51 | ) 52 | 53 | 54 | def chat_model_selector( 55 | model_value, 56 | on_model_value_change, 57 | trigger_width: str = "12em", 58 | ) -> rx.Component: 59 | """ 60 | Chat model selector. 61 | @param model_value: model value selected. 62 | @param on_model_value_change: On model value change. 63 | @param trigger_width: change trigger width. 64 | @return: the chat model selector component. 65 | """ 66 | return model_selector( 67 | ChatModelProvider, 68 | model_value, 69 | on_model_value_change, 70 | trigger_width, 71 | ) 72 | -------------------------------------------------------------------------------- /docs/source/architecture/reforce-sql-agent-workflow.mmd: -------------------------------------------------------------------------------- 1 | --- 2 | title: ReFoRCE SQL Agent Workflow 3 | --- 4 | graph TD 5 | %% Main Workflow 6 | A[🤔 User Question] --> B[📊 Database Info Compression] 7 | B --> C[📋 Format Restriction Generation] 8 | C --> D[🔍 Column Exploration] 9 | D --> E[⚡ Parallel SQL Generation] 10 | E --> F[🔄 Self-Refinement] 11 | F --> G[🗳️ Consensus Voting] 12 | G --> H[✅ Final Validation] 13 | H --> I[📝 Final Answer] 14 | 15 | %% Database Info Compression Details 16 | B --> B1[🏷️ Pattern-based
Table Grouping] 17 | B --> B2[📏 Schema Size
Analysis] 18 | B --> B3[🎯 Representative
Selection] 19 | 20 | %% Format Restriction Details 21 | C --> C1[🔍 Format
Analysis] 22 | C --> C2[📋 Column
Specification] 23 | C --> C3[💡 Example
Generation] 24 | 25 | %% Column Exploration Details 26 | D --> D1[🔧 Exploration Query
Generation] 27 | D --> D2[▶️ Query
Execution] 28 | D --> D3[💎 Insight
Extraction] 29 | 30 | %% Parallel Generation Details 31 | E --> E1[🔀 Context
Variation] 32 | E --> E2[⚡ Parallel SQL
Generation] 33 | E --> E3[📦 Candidate
Collection] 34 | 35 | %% Self-Refinement Details 36 | F --> F1[🔍 Self-Consistency
Check] 37 | F --> F2[🛠️ Error
Correction] 38 | F --> F3[✨ Query
Refinement] 39 | 40 | %% Consensus Voting Details 41 | G --> G1[▶️ Result
Execution] 42 | G --> G2[⚖️ Result
Comparison] 43 | G --> G3[🗳️ Majority
Vote] 44 | 45 | %% Final Validation Details 46 | H --> H1[📊 Confidence
Scoring] 47 | H --> H2[✅ Final
Validation] 48 | H --> H3[📝 Response
Formatting] 49 | 50 | %% Styling 51 | classDef startEnd fill:#e3f2fd,stroke:#1976d2,stroke-width:3px,color:#000 52 | classDef mainProcess fill:#f3e5f5,stroke:#7b1fa2,stroke-width:2px,color:#000 53 | classDef subProcess fill:#e8f5e8,stroke:#388e3c,stroke-width:1px,color:#000 54 | classDef innovation fill:#fff8e1,stroke:#f57c00,stroke-width:2px,color:#000 55 | 56 | class A,I startEnd 57 | class B,C,D,E,F,G,H mainProcess 58 | class B1,B2,B3,C1,C2,C3,D1,D2,D3,E1,E2,E3,F1,F2,F3,G1,G2,G3,H1,H2,H3 subProcess 59 | -------------------------------------------------------------------------------- /.github/workflows/main_ryoma-demo.yml: -------------------------------------------------------------------------------- 1 | # Docs for the Azure Web Apps Deploy action: https://github.com/Azure/webapps-deploy 2 | # More GitHub Actions for Azure: https://github.com/Azure/actions 3 | # More info on Python, GitHub Actions, and Azure App Service: https://aka.ms/python-webapps-actions 4 | 5 | name: Build and deploy ryoma_ai api 6 | 7 | on: 8 | push: 9 | branches: 10 | - main 11 | paths: 12 | - 'Dockerfile' 13 | - 'ryoma_ai/**' 14 | - 'ryoma_ai-lab/**' 15 | - '.github/workflows/main_ryoma-demo.yml' 16 | workflow_dispatch: 17 | 18 | env: 19 | AZURE_WEBAPP_PACKAGE_PATH: "./" 20 | COMMIT_SHA: ${{ github.sha }} 21 | 22 | jobs: 23 | build: 24 | runs-on: ubuntu-latest 25 | environment: production 26 | 27 | defaults: 28 | run: 29 | working-directory: ${{ env.AZURE_WEBAPP_PACKAGE_PATH }} 30 | 31 | steps: 32 | - uses: actions/checkout@v4 33 | 34 | - name: Login to Azure Container Registry 35 | uses: docker/login-action@v3 36 | with: 37 | registry: ryoma.azurecr.io 38 | username: ryoma 39 | password: ${{ secrets.AZURE_CR_PASSWORD }} 40 | 41 | - name: docker build image 42 | run: | 43 | docker build --platform linux/amd64 ./ -t ryoma-api --build-arg APP_ENV=production 44 | 45 | - name: Set up Docker Build 46 | run: | 47 | docker tag ryoma-api ryoma.azurecr.io/ryoma-dataapp:${{ env.COMMIT_SHA }} 48 | 49 | - name: Publish to Azure Container Registry 50 | run: | 51 | docker push ryoma.azurecr.io/ryoma-dataapp:${{ env.COMMIT_SHA }} 52 | 53 | # deploy: 54 | # runs-on: ubuntu-latest 55 | # needs: build 56 | # environment: production 57 | # 58 | # steps: 59 | # - name: Log in to Azure 60 | # uses: azure/login@v1 61 | # with: 62 | # creds: ${{ secrets.AZURE_CREDENTIALS }} 63 | # 64 | # - name: deploy Container App 65 | # uses: azure/container-apps-deploy-action@v2 66 | # with: 67 | # acrName: ryomaregistry 68 | # containerAppName: ryoma_ai-dataapp 69 | # resourceGroup: ryoma_ai 70 | # imageToDeploy: ryomaregistry.azurecr.io/ryoma_ai-dataapp:${{ env.COMMIT_SHA }} 71 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/pages/settings.py: -------------------------------------------------------------------------------- 1 | """The settings page.""" 2 | 3 | import reflex as rx 4 | from ryoma_lab.templates import ThemeState, template 5 | 6 | 7 | @template(route="/settings", title="Settings") 8 | def settings() -> rx.Component: 9 | """The settings page. 10 | 11 | Returns: 12 | The UI for the settings page. 13 | """ 14 | return rx.vstack( 15 | rx.heading("Settings", size="8"), 16 | rx.hstack( 17 | rx.text("Dark mode: "), 18 | rx.color_mode.switch(), 19 | ), 20 | rx.hstack( 21 | rx.text("Primary color: "), 22 | rx.select( 23 | [ 24 | "tomato", 25 | "red", 26 | "ruby", 27 | "crimson", 28 | "pink", 29 | "plum", 30 | "purple", 31 | "violet", 32 | "iris", 33 | "indigo", 34 | "blue", 35 | "cyan", 36 | "teal", 37 | "jade", 38 | "green", 39 | "grass", 40 | "brown", 41 | "orange", 42 | "sky", 43 | "mint", 44 | "lime", 45 | "yellow", 46 | "amber", 47 | "gold", 48 | "bronze", 49 | "gray", 50 | ], 51 | value=ThemeState.accent_color, 52 | on_change=ThemeState.set_accent_color, 53 | ), 54 | ), 55 | rx.hstack( 56 | rx.text("Secondary color: "), 57 | rx.select( 58 | [ 59 | "gray", 60 | "mauve", 61 | "slate", 62 | "sage", 63 | "olive", 64 | "sand", 65 | ], 66 | value=ThemeState.gray_color, 67 | on_change=ThemeState.set_gray_color, 68 | ), 69 | ), 70 | rx.text( 71 | "You can edit this page in ", 72 | rx.code("{your_app}/pages/settings.py"), 73 | size="1", 74 | ), 75 | ) 76 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/states/base.py: -------------------------------------------------------------------------------- 1 | """Base state for the app.""" 2 | 3 | from typing import List, Optional 4 | 5 | import reflex as rx 6 | from fastapi_users.db import ( 7 | SQLAlchemyBaseOAuthAccountTableUUID, 8 | SQLAlchemyBaseUserTableUUID, 9 | ) 10 | from sqlalchemy import JSON, Column 11 | from sqlmodel import Field, Relationship, select 12 | 13 | 14 | class OAuthAccount(SQLAlchemyBaseOAuthAccountTableUUID, rx.Model, table=True): 15 | id: Optional[str] = Field(default=None, primary_key=True) 16 | 17 | user_id: Optional[str] = Field(default=None, foreign_key="user.id") 18 | user: "User" = Relationship(back_populates="oauth_accounts") 19 | 20 | 21 | class User(SQLAlchemyBaseUserTableUUID, rx.Model, table=True): 22 | id: Optional[str] = Field(default=None, primary_key=True) 23 | anonymous: bool = Field(default=True) 24 | username: str = Field(nullable=False, unique=True) 25 | name: str = Field(default="") 26 | display_name: str = Field(default="") 27 | initials: str = Field(nullable=True) 28 | color: str = Field(nullable=True) 29 | avatar_url: str = Field(nullable=True) 30 | workspace: str = Field(default="{}", nullable=False) 31 | settings: str = Field(default="{}", nullable=False) 32 | permissions: str = Field(sa_column=Column(JSON), default={}) 33 | oauth_accounts: List[OAuthAccount] = Relationship(back_populates="user") 34 | 35 | 36 | class BaseState(rx.State): 37 | """State for the app.""" 38 | 39 | user: Optional[User] = None 40 | 41 | sidebar_displayed: bool = False 42 | 43 | @rx.var 44 | def origin_url(self) -> str: 45 | """Get the url of the current page. 46 | 47 | Returns: 48 | str: The url of the current page. 49 | """ 50 | return self.router_data.get("asPath", "") 51 | 52 | def toggle_sidebar_displayed(self) -> None: 53 | """Toggle the sidebar_chat_history displayed.""" 54 | self.sidebar_displayed = not self.sidebar_displayed 55 | 56 | def load_user(self) -> None: 57 | """Load the user.""" 58 | with rx.session() as session: 59 | self.user = session.exec( 60 | select(User).where(User.username == "admin") 61 | ).first() 62 | 63 | def on_load(self) -> None: 64 | """Load the state.""" 65 | self.load_user() 66 | -------------------------------------------------------------------------------- /docs/source/architecture/multi-agent-routing-clean.mmd: -------------------------------------------------------------------------------- 1 | flowchart TD 2 | %% User Input 3 | User["User Input
Natural Language Question"] --> Router 4 | 5 | %% Smart Router 6 | Router["LLM-Based Smart Router
Intent Classification & Confidence Scoring"] 7 | 8 | %% Agent Selection 9 | Router --> SQL["SQL Agent
Database Operations"] 10 | Router --> Python["Python Agent
Code Execution"] 11 | Router --> Analysis["Data Analysis Agent
Statistics & Visualization"] 12 | Router --> Chat["Chat Agent
General Q&A"] 13 | 14 | %% Capabilities 15 | SQL --> SQLCap["Natural Language to SQL
Schema Exploration
Data Retrieval & Joins
Approval Workflow"] 16 | 17 | Python --> PyCap["Script Execution
Function Creation
Algorithm Implementation
Testing & Debugging"] 18 | 19 | Analysis --> AnalysisCap["Statistical Analysis
Data Visualization
Trend Analysis
Report Generation"] 20 | 21 | Chat --> ChatCap["Explanations & Help
Best Practices
Conceptual Discussions
Information Retrieval"] 22 | 23 | %% Data Integration 24 | DataStore[("Multi-Database Support
PostgreSQL, MySQL, SQLite
DuckDB, Iceberg")] 25 | 26 | %% Vector Store 27 | VectorStore[("Semantic Search
Catalog Indexing
Optimized Performance")] 28 | 29 | %% Connections 30 | SQL -.->|Query| DataStore 31 | Analysis -.->|Analyze| DataStore 32 | SQL -.->|Search| VectorStore 33 | 34 | %% Output 35 | SQLCap --> Output["Intelligent Response
Context-Aware Results"] 36 | PyCap --> Output 37 | AnalysisCap --> Output 38 | ChatCap --> Output 39 | 40 | Output --> User 41 | 42 | %% Styling 43 | classDef userStyle fill:#e3f2fd,stroke:#1976d2,stroke-width:3px,color:#000 44 | classDef routerStyle fill:#f3e5f5,stroke:#7b1fa2,stroke-width:3px,color:#000 45 | classDef agentStyle fill:#e8f5e8,stroke:#388e3c,stroke-width:3px,color:#000 46 | classDef dataStyle fill:#fff8e1,stroke:#f57c00,stroke-width:2px,color:#000 47 | classDef outputStyle fill:#fce4ec,stroke:#c2185b,stroke-width:3px,color:#000 48 | 49 | class User userStyle 50 | class Router routerStyle 51 | class SQL,Python,Analysis,Chat agentStyle 52 | class DataStore,VectorStore dataStyle 53 | class Output outputStyle -------------------------------------------------------------------------------- /alembic/env.py: -------------------------------------------------------------------------------- 1 | from logging.config import fileConfig 2 | 3 | from sqlalchemy import engine_from_config, pool 4 | 5 | from alembic import context 6 | 7 | # this is the Alembic Config object, which provides 8 | # access to the values within the .ini file in use. 9 | config = context.config 10 | 11 | # Interpret the config file for Python logging. 12 | # This line sets up loggers basically. 13 | if config.config_file_name is not None: 14 | fileConfig(config.config_file_name) 15 | 16 | # add your model's MetaData object here 17 | # for 'autogenerate' support 18 | # from myapp import mymodel 19 | # target_metadata = mymodel.Base.metadata 20 | target_metadata = None 21 | 22 | # other values from the config, defined by the needs of env.py, 23 | # can be acquired: 24 | # my_important_option = config.get_main_option("my_important_option") 25 | # ... etc. 26 | 27 | 28 | def run_migrations_offline() -> None: 29 | """Run migrations in 'offline' mode. 30 | 31 | This configures the context with just a URL 32 | and not an Engine, though an Engine is acceptable 33 | here as well. By skipping the Engine creation 34 | we don't even need a DBAPI to be available. 35 | 36 | Calls to context.execute() here emit the given string to the 37 | script output. 38 | 39 | """ 40 | url = config.get_main_option("sqlalchemy.url") 41 | context.configure( 42 | url=url, 43 | target_metadata=target_metadata, 44 | literal_binds=True, 45 | dialect_opts={"paramstyle": "named"}, 46 | ) 47 | 48 | with context.begin_transaction(): 49 | context.run_migrations() 50 | 51 | 52 | def run_migrations_online() -> None: 53 | """Run migrations in 'online' mode. 54 | 55 | In this scenario we need to create an Engine 56 | and associate a connection with the context. 57 | 58 | """ 59 | connectable = engine_from_config( 60 | config.get_section(config.config_ini_section, {}), 61 | prefix="sqlalchemy.", 62 | poolclass=pool.NullPool, 63 | ) 64 | 65 | with connectable.connect() as connection: 66 | context.configure(connection=connection, target_metadata=target_metadata) 67 | 68 | with context.begin_transaction(): 69 | context.run_migrations() 70 | 71 | 72 | if context.is_offline_mode(): 73 | run_migrations_offline() 74 | else: 75 | run_migrations_online() 76 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/models/data_catalog.py: -------------------------------------------------------------------------------- 1 | import uuid 2 | from typing import List, Optional 3 | 4 | import reflex as rx 5 | from sqlmodel import Field, Relationship 6 | 7 | 8 | class CatalogTable(rx.Model, table=True): 9 | """The Catalog Table Model.""" 10 | 11 | __tablename__ = "catalog" 12 | 13 | id: str = Field( 14 | default_factory=lambda: str(uuid.uuid4()), primary_key=True, max_length=36 15 | ) 16 | datasource: Optional[str] = Field(None, description="Name of the datasource") 17 | catalog_name: str = Field( 18 | ..., description="Name of the catalog, also known as the database name" 19 | ) 20 | 21 | schemas: List["SchemaTable"] = Relationship(back_populates="catalog") 22 | 23 | 24 | class SchemaTable(rx.Model, table=True): 25 | """The Schema Model.""" 26 | 27 | __tablename__ = "schema" 28 | 29 | id: str = Field( 30 | default_factory=lambda: str(uuid.uuid4()), primary_key=True, max_length=36 31 | ) 32 | schema_name: str 33 | tables: List["TableTable"] = Relationship(back_populates="schema") 34 | 35 | catalog_id: Optional[str] = Field(default=None, foreign_key="catalog.id") 36 | catalog: Optional[CatalogTable] = Relationship(back_populates="schemas") 37 | 38 | 39 | class TableTable(rx.Model, table=True): 40 | """The Table Model.""" 41 | 42 | __tablename__ = "table" 43 | 44 | id: str = Field( 45 | default_factory=lambda: str(uuid.uuid4()), primary_key=True, max_length=36 46 | ) 47 | table_name: str 48 | description: Optional[str] = None 49 | is_view: Optional[bool] = False 50 | attrs: Optional[str] = None 51 | columns: List["ColumnTable"] = Relationship(back_populates="table") 52 | 53 | schema_id: Optional[str] = Field(default=None, foreign_key="schema.id") 54 | schema: Optional[SchemaTable] = Relationship(back_populates="tables") 55 | 56 | 57 | class ColumnTable(rx.Model, table=True): 58 | """The Column Model.""" 59 | 60 | __tablename__ = "column" 61 | 62 | id: str = Field( 63 | default_factory=lambda: str(uuid.uuid4()), primary_key=True, max_length=36 64 | ) 65 | name: str 66 | type: str 67 | description: Optional[str] = None 68 | 69 | table_id: Optional[str] = Field(default=None, foreign_key="table.id") 70 | table: Optional[TableTable] = Relationship(back_populates="columns") 71 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/agent/spark_agent.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from ryoma_ai.agent.workflow import WorkflowAgent 3 | from ryoma_ai.tool.python_tool import PythonTool 4 | from ryoma_ai.tool.spark_tool import ConvertPandasToSparkTool, SparkTool 5 | 6 | 7 | class SparkAgent(WorkflowAgent): 8 | description: str = ( 9 | "A PySpark agent that can use PySpark tools to run PySpark scripts." 10 | ) 11 | 12 | def __init__( 13 | self, spark_configs: dict[str, str], model: str, model_parameters=None 14 | ): 15 | self.spark_session = None 16 | self.init_session(spark_configs) 17 | super().__init__( 18 | [ 19 | SparkTool(), 20 | ConvertPandasToSparkTool(), 21 | ], 22 | model, 23 | model_parameters, 24 | ) 25 | for tool in self.tools: 26 | if isinstance(tool, PythonTool): 27 | tool.update_script_context( 28 | script_context={"spark_session": self.spark_session} 29 | ) 30 | 31 | def init_session(self, spark_configs: dict[str, str]): 32 | self.spark_session = self.create_spark_session(spark_configs) 33 | self.spark_session.conf.set("spark.sql.execution.arrow.enabled", "true") 34 | 35 | @staticmethod 36 | def create_spark_session(spark_configs: dict[str, str]): 37 | assert "master" in spark_configs, "master is required in spark_configs" 38 | assert "app_name" in spark_configs, "app_name is required in spark_configs" 39 | 40 | # TODO refactor to use ibis spark backend 41 | import findspark 42 | from pyspark.sql import SparkSession 43 | 44 | findspark.init() 45 | 46 | return ( 47 | SparkSession.builder.master(spark_configs.get("master")) 48 | .appName(spark_configs.get("app_name")) 49 | .getOrCreate() 50 | ) 51 | 52 | def add_pandas_dataframe(self, dataframe: pd.DataFrame): 53 | df_id = f"df_{id(dataframe)}" 54 | self.add_prompt( 55 | f""" 56 | dataframe name: {df_id} 57 | dataframe metadata: {dataframe.info} 58 | """ 59 | ) 60 | for tool in self.tools: 61 | if isinstance(tool, PythonTool): 62 | tool.update_script_context(script_context={df_id: dataframe}) 63 | return self 64 | -------------------------------------------------------------------------------- /tests/unit_tests/test_tool.py: -------------------------------------------------------------------------------- 1 | from unittest.mock import Mock, patch 2 | 3 | import pandas as pd 4 | import pytest 5 | from pyspark.sql import SparkSession 6 | from ryoma_ai.models.sql import QueryStatus 7 | from ryoma_ai.tool.pandas_tool import PandasTool 8 | from ryoma_ai.tool.spark_tool import SparkTool 9 | from ryoma_ai.tool.sql_tool import SqlQueryTool 10 | 11 | from tests.unit_tests.test_datasource import MockSqlDataSource 12 | 13 | 14 | @pytest.fixture 15 | def pandas_dataframe(): 16 | df = pd.DataFrame( 17 | { 18 | "year": [2020, 2022, 2019, 2021], 19 | "n_legs": [2, 4, 5, 100], 20 | "animals": ["Flamingo", "Horse", "Brittle stars", "Centipede"], 21 | } 22 | ) 23 | return df 24 | 25 | 26 | @pytest.fixture 27 | def mock_sql_data_source(): 28 | data_source = MockSqlDataSource() 29 | return data_source 30 | 31 | 32 | @pytest.fixture 33 | def pyspark_session(): 34 | return SparkSession.builder.appName("pytest").getOrCreate() 35 | 36 | 37 | def test_pyspark_tool(pyspark_session, pandas_dataframe): 38 | pyspark_tool = SparkTool() 39 | pyspark_tool.update_script_context( 40 | {"spark_session": pyspark_session, "df": pandas_dataframe} 41 | ) 42 | script = """ 43 | spark_session.createDataFrame(df).show() 44 | """ 45 | result = pyspark_tool._run(script) 46 | assert result.success is True 47 | 48 | 49 | def test_sql_tool(mock_sql_data_source): 50 | with patch("ryoma_ai.datasource.sql.SqlDataSource.query") as mock_execute: 51 | mock_execute.return_value = "success" 52 | 53 | # Mock the store to return the datasource 54 | mock_store = Mock() 55 | mock_store.get.return_value = Mock(value=mock_sql_data_source) 56 | 57 | sql_tool = SqlQueryTool() 58 | query = "SELECT * FROM customers LIMIT 4" 59 | result = sql_tool._run(query, mock_store) 60 | assert result.data == "success" 61 | assert result.status == QueryStatus.SUCCESS 62 | 63 | 64 | def test_pandas_tool(pandas_dataframe): 65 | pandas_tool = PandasTool() 66 | pandas_tool.update_script_context({"df": pandas_dataframe}) 67 | script = """ 68 | df["year"] = df["year"] + 1 69 | df 70 | """ 71 | result = pandas_tool._run(script) 72 | assert result.success is True 73 | assert result.result["year"].tolist() == [2021, 2023, 2020, 2022] 74 | -------------------------------------------------------------------------------- /packages/ryoma_ai/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling>=1.4.0"] 3 | build-backend = "hatchling.build" 4 | 5 | [project] 6 | name = "ryoma_ai" 7 | version = "0.1.5" 8 | description = "AI Powered Data Platform" 9 | readme = "README.md" 10 | repository = "https://github.com/project-ryoma/ryoma" 11 | homepage = "https://www.ryoma-ai.com" 12 | requires-python = ">=3.9" 13 | 14 | keywords = ["Artificial Intelligence", "Data Analysis", "Code Generation"] 15 | 16 | classifiers = [ 17 | "Development Status :: 3 - Alpha", 18 | "Intended Audience :: Developers", 19 | "Operating System :: OS Independent", 20 | "Topic :: Software Development :: Libraries :: Python Modules", 21 | "License :: OSI Approved :: Apache Software License", 22 | "Programming Language :: Python :: 3", 23 | "Programming Language :: Python :: 3.9", 24 | "Programming Language :: Python :: 3.10", 25 | ] 26 | 27 | dependencies = [ 28 | "mock>=5.1.0", 29 | "openai>=1.33.0", 30 | "pydantic>=2.7.1", 31 | "pandas>=2.2.2", 32 | "langchain-openai>=0.1.7", 33 | "langchain>=0.2.9, <0.3.0", 34 | "langgraph>=0.2.0", 35 | "ipython>=8.14.0", 36 | "pyarrow>=16.1.0", 37 | "typer>=0.12.3", 38 | "types-setuptools>=70.0.0.20240524", 39 | "datasketch>=1.6.5", 40 | "httpx==0.27.2", 41 | "click>=8.0.0", 42 | "rich>=13.0.0", 43 | "prompt-toolkit>=3.0.0", 44 | "amundsen-databuilder>=7.5.0", 45 | "ibis-framework>=9.0.0", 46 | ] 47 | 48 | [project.scripts] 49 | ryoma_ai = "ryoma_ai.cli.main:main" 50 | 51 | [project.optional-dependencies] 52 | snowflake = [ 53 | "ibis-framework[snowflake]>=9.0.0", 54 | "amundsen-databuilder[snowflake]>=7.5.0", 55 | ] 56 | sqlite = [ 57 | "ibis-framework[sqlite]>=9.0.0", 58 | ] 59 | postgres = [ 60 | "psycopg2>=2.9.2", 61 | "ibis-framework[postgres]>=9.0.0", 62 | ] 63 | mysql = [ 64 | "ibis-framework[mysql]>=9.0.0", 65 | "amundsen-databuilder[rds]>=7.5.0", 66 | ] 67 | bigquery = [ 68 | "ibis-framework[bigquery]>=9.0.0", 69 | "amundsen-databuilder[bigquery]>=7.5.0", 70 | ] 71 | pyspark = [ 72 | "ibis-framework[pyspark]>=9.0.0", 73 | "pyspark>=3.2.0", 74 | "findspark>=1.4.2", 75 | ] 76 | duckdb = [ 77 | "duckdb>=1.0.0", 78 | "ibis-framework[duckdb]>=9.0.0" 79 | ] 80 | dynamodb = [ 81 | "boto3>=1.28.0", 82 | "aioboto3>=11.0.0" 83 | ] 84 | iceberg = [ 85 | "pyiceberg>=0.5.0", 86 | "pyarrow>=14.0.0" 87 | ] 88 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/react_rnd.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict, Optional 2 | 3 | import reflex as rx 4 | 5 | 6 | class DraggableData(rx.Base): 7 | x: int 8 | y: int 9 | deltaX: int 10 | deltaY: int 11 | lastX: int 12 | lastY: int 13 | 14 | 15 | class RnD(rx.Component): 16 | library = "react-rnd" 17 | tag = "Rnd" 18 | 19 | # Props 20 | default: rx.Var[Dict[str, Any]] 21 | position: rx.Var[Dict[str, int]] 22 | size: rx.Var[Dict[str, int]] 23 | bounds: rx.Var[str] 24 | min_width: rx.Var[int] 25 | min_height: rx.Var[int] 26 | max_width: rx.Var[int] 27 | max_height: rx.Var[int] 28 | drag_grid: rx.Var[tuple[int, int]] 29 | resize_grid: rx.Var[tuple[int, int]] 30 | lockAspectRatio: rx.Var[bool] 31 | enable_user_select_hack: rx.Var[bool] 32 | disable_dragging: rx.Var[bool] 33 | enable: rx.Var[Dict[str, bool]] 34 | 35 | # Event handlers 36 | on_drag_start: rx.Var[Optional[rx.EventHandler]] 37 | on_drag: rx.Var[Optional[rx.EventHandler]] 38 | on_drag_stop: rx.Var[Optional[rx.EventHandler]] 39 | on_resize_start: rx.Var[Optional[rx.EventHandler]] 40 | on_resize: rx.Var[Optional[rx.EventHandler]] 41 | on_resize_stop: rx.Var[Optional[rx.EventHandler]] 42 | 43 | def get_event_triggers(self) -> Dict[str, Any]: 44 | """Get event triggers.""" 45 | 46 | def drag_signature(e0, data: DraggableData): 47 | """Get the drag signature.""" 48 | return [ 49 | data.x, 50 | data.y, 51 | data.deltaX, 52 | data.deltaY, 53 | data.lastX, 54 | data.lastY, 55 | ] 56 | 57 | def resize_signature(e0, direction, ref, delta, position): 58 | """Get the resize signature.""" 59 | return [ 60 | direction, 61 | delta.width, 62 | delta.height, 63 | position.x, 64 | position.y, 65 | ] 66 | 67 | return { 68 | **super().get_event_triggers(), 69 | "on_drag_start": drag_signature, 70 | "on_drag": drag_signature, 71 | "on_drag_stop": drag_signature, 72 | "on_resize_start": resize_signature, 73 | "on_resize": resize_signature, 74 | "on_resize_stop": resize_signature, 75 | } 76 | 77 | 78 | rnd = RnD.create 79 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["hatchling>=1.4.0"] 3 | build-backend = "hatchling.build" 4 | 5 | [tool.hatch.build.targets.wheel] 6 | packages = ["src/foo"] 7 | 8 | 9 | [project] 10 | name = "ryoma-dev" 11 | version = "0.0.1" 12 | description = "Root project for Ryoma AI Platform" 13 | license = {file = "LICENSE"} 14 | readme = "README.md" 15 | requires-python = ">=3.10" 16 | 17 | dependencies = [ 18 | "ryoma_ai[snowflake, pyspark, sqlite, mysql, bigquery]", 19 | "ryoma_lab", 20 | "pytest >= 6.2.5", 21 | "black >= 23.3.0", 22 | "isort >= 5.9.3", 23 | "mypy >= 0.910", 24 | "flake8 >= 3.9.2", 25 | "click >= 8.0.0", 26 | "pip >= 21.3.1", 27 | "setuptools >= 58.0.4", 28 | "pyupgrade >= 2.26.0", 29 | "pytest-cov>=5.0.0", 30 | "openai-responses>=0.10.0", 31 | "coverage-badge>=1.1.2", 32 | "reflex-chakra>=0.7.0", 33 | ] 34 | 35 | [tool.uv.sources] 36 | ryoma_ai = { workspace = true} 37 | ryoma_lab = { workspace = true} 38 | 39 | [tool.uv.workspace] 40 | members = ["packages/ryoma_ai", "packages/ryoma_lab"] 41 | 42 | [tool.isort] 43 | profile = "black" 44 | 45 | 46 | [tool.mypy] 47 | allow_redefinition = false 48 | check_untyped_defs = true 49 | disallow_any_generics = true 50 | disallow_incomplete_defs = true 51 | ignore_missing_imports = true 52 | implicit_reexport = false 53 | no_implicit_optional = true 54 | show_column_numbers = true 55 | show_error_codes = true 56 | show_error_context = true 57 | strict_equality = true 58 | strict_optional = true 59 | warn_no_return = true 60 | warn_redundant_casts = true 61 | warn_return_any = true 62 | warn_unreachable = true 63 | warn_unused_configs = true 64 | warn_unused_ignores = true 65 | explicit_package_bases = true 66 | 67 | 68 | [tool.pytest.ini_options] 69 | norecursedirs = ["hooks", "*.egg", ".eggs", "dist", "build", "docs", ".tox", ".git", "__pycache__"] 70 | doctest_optionflags = ["NUMBER", "NORMALIZE_WHITESPACE", "IGNORE_EXCEPTION_DETAIL"] 71 | addopts = [ 72 | "--strict-markers", 73 | "--tb=short", 74 | "--doctest-modules", 75 | "--doctest-continue-on-failure", 76 | ] 77 | 78 | [tool.coverage.run] 79 | source = ["tests"] 80 | 81 | [coverage.paths] 82 | source = "ryoma" 83 | 84 | [coverage.run] 85 | branch = true 86 | 87 | [coverage.report] 88 | fail_under = 50 89 | show_missing = true 90 | 91 | [dependency-groups] 92 | dev = [ 93 | "mypy>=1.11.2", 94 | "pytest>=8.3.3", 95 | "pytest-cov>=5.0.0", 96 | "ruff>=0.12.10", 97 | ] 98 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/styles.py: -------------------------------------------------------------------------------- 1 | """Styles for the app.""" 2 | 3 | import reflex as rx 4 | 5 | global_style = { 6 | ".gridjs-container": { 7 | "font_size": "12px", 8 | } 9 | } 10 | border_radius = "0.375rem" 11 | border = f"1px solid {rx.color('gray', 6)}" 12 | text_color = rx.color("gray", 11) 13 | accent_text_color = rx.color("accent", 10) 14 | accent_color = rx.color("accent", 1) 15 | hover_accent_color = {"_hover": {"color": accent_text_color}} 16 | hover_accent_bg = {"_hover": {"background_color": accent_color}} 17 | content_width_vw = "90vw" 18 | sidebar_width = "18em" 19 | 20 | template_page_style = { 21 | "padding_top": "5em", 22 | "padding_x": ["auto", "0.5em"], 23 | "flex": "1", 24 | "overflow_x": "hidden", 25 | } 26 | 27 | template_content_style = { 28 | "border_radius": border_radius, 29 | "margin_bottom": "2em", 30 | "min_height": "90vh", 31 | } 32 | 33 | link_style = { 34 | "color": accent_text_color, 35 | "text_decoration": "none", 36 | **hover_accent_color, 37 | } 38 | 39 | overlapping_button_style = { 40 | "background_color": "white", 41 | "border_radius": border_radius, 42 | } 43 | 44 | markdown_style = { 45 | "h1": lambda text: rx.heading(text, size="5", margin_y="1em"), 46 | "h2": lambda text: rx.heading(text, size="3", margin_y="1em"), 47 | "h3": lambda text: rx.heading(text, size="1", margin_y="1em"), 48 | "p": lambda text: rx.text(text, color="black", margin_y="1em"), 49 | "code": lambda text: rx.code(text, color_scheme="gray"), 50 | "codeblock": lambda text, **props: rx.code_block(text, **props, margin_y="1em"), 51 | "a": lambda text, **props: rx.link( 52 | text, 53 | **props, 54 | color="blue", 55 | _hover={"color": "red"}, 56 | font_weight="bold", 57 | text_decoration="underline", 58 | text_decoration_color=accent_text_color, 59 | ), 60 | "table": lambda el: rx.table.root(el, size="1", width="40em"), 61 | "thead": lambda el: rx.table.header(el, border_bottom=border), 62 | "tr": lambda text: rx.table.row(text, border_bottom=border), 63 | } 64 | 65 | # Common styles for questions and answers. 66 | shadow = "rgba(0, 0, 0, 0.15) 0px 2px 8px" 67 | chat_margin = "20%" 68 | message_style = dict( 69 | border_radius="5px", 70 | box_shadow=shadow, 71 | display="inline-block", 72 | margin_y="0.5em", 73 | padding_left="1em", 74 | padding_right="1em", 75 | max_width="44em", 76 | ) 77 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/services/kernel/sqlkernel.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import re 3 | from typing import Any, Dict 4 | 5 | from ryoma_ai.datasource.factory import DataSourceFactory 6 | from ryoma_ai.datasource.sql import SqlDataSource 7 | from ryoma_lab.services.kernel.base import BaseKernel 8 | from sqlalchemy.exc import SQLAlchemyError 9 | 10 | 11 | class SqlKernel(BaseKernel): 12 | datasource: SqlDataSource 13 | 14 | def __init__(self, datasource: SqlDataSource, **kwargs): 15 | if not datasource: 16 | datasource = DataSourceFactory.create_datasource("duckdb") 17 | super().__init__(datasource, **kwargs) 18 | 19 | def execute(self, query: str) -> Dict[str, Any]: 20 | logging.info(f"Executing SQL query: {query}") 21 | 22 | try: 23 | df = self.datasource.query(query) 24 | return { 25 | "output_type": "dataframe", 26 | "data": df, 27 | } 28 | except SQLAlchemyError as e: 29 | logging.error(f"SQLAlchemy error: {str(e)}") 30 | return self._create_error_response(e) 31 | except Exception as e: 32 | logging.error(f"Unexpected error: {str(e)}") 33 | return self._create_error_response(e) 34 | 35 | def _extract_datasource_from_query(self, query: str) -> str: 36 | # This regex looks for table names in common SQL patterns 37 | pattern = r'\bFROM\s+"?(\w+)"?|\bJOIN\s+"?(\w+)"?' 38 | matches = re.findall(pattern, query, re.IGNORECASE) 39 | # Flatten and filter the matches 40 | datasources = [ds for match in matches for ds in match if ds] 41 | return datasources[0] if datasources else None 42 | 43 | def _get_datasource(self, name: str) -> SqlDataSource: 44 | datasource = self.datasources.get(name) 45 | if datasource: 46 | logging.info(f"Found type: {name}") 47 | else: 48 | logging.warning(f"Datasource not found: {name}") 49 | return datasource 50 | 51 | def _remove_datasource_from_query(self, query: str, datasource_name: str) -> str: 52 | # Remove the type name from the query 53 | pattern = r"\b" + re.escape(datasource_name) + r"\." 54 | return re.sub(pattern, "", query, flags=re.IGNORECASE) 55 | 56 | def set_datasources(self, datasources: Dict[str, SqlDataSource]): 57 | self.datasources = datasources 58 | logging.info(f"Updated datasources: {list(self.datasources.keys())}") 59 | -------------------------------------------------------------------------------- /tests/e2e/ryoma_ai/test_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | End-to-end tests for agents. 3 | 4 | These tests can use either: 5 | 1. OpenAI API (requires OPENAI_API_KEY environment variable) 6 | 2. GPT4All local model (requires gpt4all package and will download model on first run) 7 | """ 8 | 9 | import os 10 | 11 | import pytest 12 | from ryoma_ai.agent.chat_agent import ChatAgent 13 | from ryoma_ai.agent.sql import SqlAgent 14 | 15 | # Determine which backend to use 16 | USE_OPENAI = ( 17 | os.environ.get("OPENAI_API_KEY") 18 | and os.environ.get("USE_OPENAI_FOR_TESTS", "").lower() == "true" 19 | ) 20 | USE_GPT4ALL = ( 21 | not USE_OPENAI and os.environ.get("USE_GPT4ALL_FOR_TESTS", "").lower() == "true" 22 | ) 23 | 24 | if USE_GPT4ALL: 25 | # Only import if we're using it 26 | pytest.importorskip("gpt4all") 27 | MODEL = "gpt4all:Llama-3.2-1B-Instruct-Q4_0.gguf" 28 | elif USE_OPENAI: 29 | MODEL = "gpt-3.5-turbo" 30 | else: 31 | # Skip all tests if no backend is configured 32 | pytest.skip( 33 | "No LLM backend configured. Set USE_OPENAI_FOR_TESTS=true or USE_GPT4ALL_FOR_TESTS=true", 34 | allow_module_level=True, 35 | ) 36 | 37 | 38 | def test_base_agent(): 39 | """Test ChatAgent with configured model.""" 40 | ryoma_agent = ChatAgent(MODEL) 41 | 42 | # Test with a simple query - disable display to capture results 43 | result = ryoma_agent.stream("What is 2 + 2?", display=False) 44 | assert result is not None 45 | 46 | # Collect streamed results 47 | responses = list(result) 48 | assert len(responses) > 0 49 | 50 | # Check that we got some response content 51 | response_text = "".join( 52 | str(r.content if hasattr(r, "content") else str(r)) for r in responses 53 | ) 54 | assert len(response_text) > 0 55 | 56 | 57 | def test_workflow_agent(): 58 | """Test SqlAgent with configured model.""" 59 | ryoma_agent = SqlAgent(MODEL) 60 | 61 | # Test with a simple SQL-related query - disable display to capture results 62 | result = ryoma_agent.stream( 63 | "Show me a simple SQL query to select all records from a table", display=False 64 | ) 65 | assert result is not None 66 | 67 | # Collect streamed results 68 | responses = list(result) 69 | assert len(responses) > 0 70 | 71 | # Check that we got some response 72 | response_text = "".join( 73 | str(r.content if hasattr(r, "content") else str(r)) for r in responses 74 | ) 75 | assert len(response_text) > 0 76 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/datasource/duckdb.py: -------------------------------------------------------------------------------- 1 | import inspect 2 | from typing import Any, Optional 3 | 4 | import duckdb 5 | from pydantic import BaseModel, Field 6 | from ryoma_ai.datasource.sql import SqlDataSource 7 | 8 | 9 | class DuckDBConfig(BaseModel): 10 | database: Optional[str] = Field( 11 | default=":memory:", 12 | description="DuckDB database file path or :memory: for in-memory", 13 | ) 14 | read_only: Optional[bool] = Field( 15 | default=False, description="Open database in read-only mode" 16 | ) 17 | temp_directory: Optional[str] = Field( 18 | default=None, description="Temporary directory for DuckDB operations" 19 | ) 20 | extensions: Optional[list] = Field( 21 | default=None, description="List of DuckDB extensions to load" 22 | ) 23 | config: Optional[dict] = Field( 24 | default=None, description="Additional DuckDB configuration options" 25 | ) 26 | 27 | 28 | class DuckDBDataSource(SqlDataSource): 29 | def get_query_plan(self, query: str) -> Any: 30 | pass 31 | 32 | def crawl_catalog(self, **kwargs): 33 | pass 34 | 35 | def __init__( 36 | self, 37 | database: str = ":memory:", 38 | read_only: bool = False, 39 | temp_directory: Optional[str] = None, 40 | extensions: Optional[list] = None, 41 | config: Optional[dict] = None, 42 | **kwargs, 43 | ): 44 | super().__init__(database=database, **kwargs) 45 | self.read_only = read_only 46 | self.config = config or {} 47 | if temp_directory: 48 | self.config["temp_directory"] = temp_directory 49 | self.extensions = extensions 50 | 51 | def _connect(self, **kwargs) -> Any: 52 | conn = duckdb.connect( 53 | database=self.database, 54 | read_only=self.read_only, 55 | config=self.config, 56 | ) 57 | if self.extensions: 58 | for extension in self.extensions: 59 | conn.load_extension(extension) 60 | return conn 61 | 62 | def query(self, query, result_format="pandas", **kwargs) -> Any: 63 | conn = self.connect() 64 | # TODO: Should we abstract this to support other backends? 65 | inspect.currentframe().f_locals.update(**kwargs) 66 | return conn.sql(query).execute().fetchdf() 67 | 68 | def register(self, name: str, data: Any, **kwargs): 69 | conn = self.connect() 70 | conn.register(name, data) 71 | -------------------------------------------------------------------------------- /tests/unit_tests/test_cli_fixed.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """ 3 | Test the fixed CLI functionality 4 | """ 5 | 6 | import sys 7 | from pathlib import Path 8 | 9 | from ryoma_ai.cli.app import RyomaAI 10 | 11 | # Add the ryoma_ai package to Python path 12 | ryoma_ai_path = Path(__file__).parent / "packages" / "ryoma_ai" 13 | sys.path.insert(0, str(ryoma_ai_path)) 14 | 15 | 16 | def test_cli_with_sqlite(): 17 | """Test CLI with SQLite database and basic mode.""" 18 | print("🧪 Testing CLI with SQLite and basic mode...") 19 | 20 | cli = RyomaAI() 21 | 22 | # Configure for SQLite basic mode 23 | cli.config_manager.config["database"] = {"type": "sqlite", "database": ":memory:"} 24 | cli.config_manager.config["mode"] = "basic" 25 | 26 | # Setup datasource and agent 27 | if cli.datasource_manager.setup_from_config(cli.config_manager.config["database"]): 28 | print("✅ Datasource setup successful") 29 | 30 | if cli.agent_manager.setup_agent_manager( 31 | config=cli.config_manager.config, 32 | datasource=cli.datasource_manager.current_datasource, 33 | ): 34 | print("✅ Agent setup successful") 35 | 36 | # Create test table directly 37 | conn = cli.datasource_manager.current_datasource.connect() 38 | conn.raw_sql("CREATE TABLE users (id INTEGER, name TEXT, city TEXT)") 39 | conn.raw_sql( 40 | "INSERT INTO users VALUES (1, 'Alice', 'New York'), (2, 'Bob', 'London'), (3, 'Charlie', 'Tokyo')" 41 | ) 42 | print("✅ Test data created") 43 | 44 | # Simulate a question (but don't actually process it interactively) 45 | print("CLI is ready to process questions!") 46 | print("Example questions you could ask:") 47 | print("- 'Show me all users'") 48 | print("- 'How many users are there?'") 49 | print("- 'Show me users from New York'") 50 | 51 | # Test the schema display (through display manager) 52 | print("\n🔍 Testing schema display:") 53 | try: 54 | schema = cli.datasource_manager.current_datasource.get_table_schema("") 55 | cli.display_manager.show_schema(schema) 56 | except Exception as e: 57 | print(f"Schema display error: {e}") 58 | 59 | else: 60 | print("❌ Agent setup failed") 61 | else: 62 | print("❌ Datasource setup failed") 63 | 64 | 65 | if __name__ == "__main__": 66 | test_cli_with_sqlite() 67 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/agent/kernel_node.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, List, Optional, Sequence, Union 2 | 3 | from langchain_core.messages import ToolCall, ToolMessage 4 | from langchain_core.runnables import RunnableConfig 5 | from langchain_core.tools import BaseTool 6 | from langgraph.prebuilt import ToolNode 7 | 8 | 9 | class KernelNode(ToolNode): 10 | """A ToolNode that can use a custom executor for running the tool.""" 11 | 12 | def __init__( 13 | self, 14 | tools: Sequence[Union[BaseTool, Callable]], 15 | executor: Callable, 16 | *, 17 | name: str = "kernel_tools", 18 | tags: Optional[List[str]] = None, 19 | handle_tool_errors: bool = True, 20 | ): 21 | super().__init__( 22 | tools, name=name, tags=tags, handle_tool_errors=handle_tool_errors 23 | ) 24 | self.executor = executor 25 | 26 | def _run_one(self, call: ToolCall, config: RunnableConfig) -> ToolMessage: 27 | if invalid_tool_message := self._validate_tool_call(call): 28 | return invalid_tool_message 29 | 30 | try: 31 | input = {**call, **{"type": "tool_call"}} 32 | tool = self.tools_by_name[call["name"]] 33 | result = self.executor(tool, input, config) 34 | tool_message = ToolMessage( 35 | content=str(result), name=call["name"], tool_call_id=call["id"] 36 | ) 37 | return tool_message 38 | except Exception as e: 39 | if not self.handle_tool_errors: 40 | raise e 41 | content = f"Error: {repr(e)}\n Please fix your mistakes." 42 | return ToolMessage(content, name=call["name"], tool_call_id=call["id"]) 43 | 44 | async def _arun_one(self, call: ToolCall, config: RunnableConfig) -> ToolMessage: 45 | if invalid_tool_message := self._validate_tool_call(call): 46 | return invalid_tool_message 47 | 48 | try: 49 | input = {**call, **{"type": "tool_call"}} 50 | tool = self.tools_by_name[call["name"]] 51 | result = await self.executor(tool, input, config) 52 | tool_message = ToolMessage( 53 | content=str(result), name=call["name"], tool_call_id=call["id"] 54 | ) 55 | return tool_message 56 | except Exception as e: 57 | if not self.handle_tool_errors: 58 | raise e 59 | content = f"Error: {repr(e)}\n Please fix your mistakes." 60 | return ToolMessage(content, name=call["name"], tool_call_id=call["id"]) 61 | -------------------------------------------------------------------------------- /research/nn/transformer_classifier.py: -------------------------------------------------------------------------------- 1 | # Create a decoder only transformer classifier. 2 | from typing import Any, Dict 3 | 4 | import torch 5 | from torch import nn 6 | 7 | 8 | class TransformerClassifier(nn.Module): 9 | """Create a transformer classifier.""" 10 | 11 | def __init__(self, config: Dict[str, Any], num_labels: int): 12 | super().__init__() 13 | self.config = config 14 | self.num_labels = num_labels 15 | 16 | self.embd = nn.Embedding( 17 | num_embeddings=config["vocab_size"], 18 | embedding_dim=config["hidden_size"], 19 | ) 20 | 21 | self.decoder_layer = nn.TransformerDecoderLayer( 22 | d_model=config["hidden_size"], 23 | nhead=config["num_attention_heads"], 24 | ) 25 | 26 | self.decoder = nn.TransformerDecoder( 27 | decoder_layer=self.decoder_layer, 28 | num_layers=config["num_hidden_layers"], 29 | ) 30 | 31 | self.classifier = nn.Linear(config["hidden_size"], num_labels) 32 | 33 | def forward(self, input_ids: torch.Tensor) -> torch.Tensor: 34 | """Forward pass.""" 35 | embedded_seq = self.embd(input_ids) 36 | 37 | tgt_mask = nn.Transformer.generate_square_subsequent_mask(input_ids.size(1)).to( 38 | input_ids.device 39 | ) 40 | 41 | x = self.decoder(embedded_seq, embedded_seq, tgt_mask=tgt_mask) 42 | x = self.classifier(x[:, -1, :]) 43 | probs = torch.nn.functional.softmax(x, dim=-1) 44 | return probs 45 | 46 | 47 | # example parameters 48 | config = { 49 | "vocab_size": 1000, 50 | "hidden_size": 128, 51 | "num_attention_heads": 4, 52 | "num_hidden_layers": 2, 53 | } 54 | 55 | # create model 56 | model = TransformerClassifier(config, num_labels=2) 57 | # print(model) 58 | 59 | # example input and target 60 | input_ids = torch.randint(0, 1000, (10, 10)) 61 | targets = torch.randint(0, 2, (10,)) 62 | print("input_ids", input_ids.size()) 63 | print("targets", targets.size()) 64 | 65 | 66 | def train(x, y): 67 | loss_fn = nn.CrossEntropyLoss() 68 | optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) 69 | for i in range(10): 70 | optimizer.zero_grad() 71 | output = model(x) 72 | # print("output", output) 73 | loss = loss_fn(output, y) 74 | loss.backward() 75 | optimizer.step() 76 | print(loss.item()) 77 | if i == 9: 78 | predicted_labels = torch.argmax(output, dim=-1) 79 | print("predicted_labels", predicted_labels) 80 | 81 | 82 | train(input_ids, targets) 83 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/workspace/cell.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import reflex as rx 4 | from ryoma_lab.models.cell import ( 5 | CellOutput, 6 | DataframeOutput, 7 | ErrorOutput, 8 | ExecuteResultOutput, 9 | StreamOutput, 10 | UnknownOutput, 11 | ) 12 | from ryoma_lab.states.workspace import WorkspaceState 13 | 14 | 15 | def render_dataframe(item: DataframeOutput) -> rx.Component: 16 | return rx.data_table(data=item.dataframe) 17 | 18 | 19 | def render_error_output(item: ErrorOutput) -> rx.Component: 20 | return rx.vstack( 21 | rx.text(f"{item.ename}: {item.evalue}", color="red"), 22 | rx.code(item.traceback, language="python", color_scheme="gray"), 23 | align_items="stretch", 24 | width="100%", 25 | ) 26 | 27 | 28 | def render_output_item( 29 | item: Union[ 30 | CellOutput, 31 | StreamOutput, 32 | ExecuteResultOutput, 33 | DataframeOutput, 34 | ErrorOutput, 35 | UnknownOutput, 36 | ], 37 | ) -> rx.Component: 38 | return rx.box( 39 | rx.cond( 40 | item.output_type == "stream", 41 | render_stream_output(item), 42 | rx.cond( 43 | item.output_type == "execute_result", 44 | render_execute_result(item), 45 | rx.cond( 46 | item.output_type == "dataframe", 47 | render_dataframe(item), 48 | rx.cond( 49 | item.output_type == "error", 50 | render_error_output(item), 51 | rx.text("Unknown output type"), 52 | ), 53 | ), 54 | ), 55 | ) 56 | ) 57 | 58 | 59 | def render_stream_output(item: StreamOutput) -> rx.Component: 60 | return rx.text(item.text) 61 | 62 | 63 | def render_execute_result(item: ExecuteResultOutput) -> rx.Component: 64 | if WorkspaceState.data_contains_html(item): 65 | return rx.html(f"{WorkspaceState.get_html_content(item)}") 66 | elif WorkspaceState.data_contains_image(item): 67 | return rx.image( 68 | src=f"data:image/png;base64,{WorkspaceState.get_image_content(item)}" 69 | ) 70 | else: 71 | return rx.markdown(f"```{WorkspaceState.get_plain_text_content(item)}```") 72 | 73 | 74 | def render_output( 75 | output: list[ 76 | Union[ 77 | StreamOutput, 78 | ExecuteResultOutput, 79 | DataframeOutput, 80 | ErrorOutput, 81 | UnknownOutput, 82 | ] 83 | ], 84 | ) -> rx.Component: 85 | return rx.vstack(rx.foreach(output, render_output_item)) 86 | -------------------------------------------------------------------------------- /assets/aita_black.svg: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 8 | 9 | 10 | 12 | 13 | 14 | 15 | 18 | 19 | 20 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /assets/aita_white.svg: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 8 | 9 | 10 | 12 | 13 | 14 | 15 | 18 | 19 | 20 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /tests/unit_tests/test_lazy_imports.py: -------------------------------------------------------------------------------- 1 | """Test that lazy imports work correctly for datasources.""" 2 | 3 | import sys 4 | from unittest.mock import patch 5 | 6 | import pytest 7 | 8 | 9 | def test_datasource_factory_lazy_import(): 10 | """Test that DataSourceFactory doesn't import datasource modules at import time.""" 11 | # This import should succeed regardless of which datasources are installed 12 | from ryoma_ai.datasource.factory import DataSourceFactory, get_supported_datasources 13 | 14 | # Getting supported datasources should work 15 | datasources = get_supported_datasources() 16 | assert len(datasources) > 0 17 | 18 | # Test that we can create datasources when dependencies are available 19 | try: 20 | # Try to create sqlite datasource (usually available) 21 | sqlite_ds = DataSourceFactory.create_datasource("sqlite", ":memory:") 22 | assert sqlite_ds is not None 23 | except ImportError: 24 | # If sqlite isn't available, that's fine for this test 25 | pass 26 | 27 | # Mock import_module to simulate missing dependencies for a specific datasource 28 | with patch("ryoma_ai.datasource.factory.import_module") as mock_import: 29 | mock_import.side_effect = ImportError("No module named 'duckdb'") 30 | 31 | # Creating a duckdb datasource should fail with helpful error 32 | with pytest.raises(ImportError) as exc_info: 33 | DataSourceFactory.create_datasource("duckdb", ":memory:") 34 | 35 | assert "Failed to import duckdb datasource" in str(exc_info.value) 36 | assert "Please install required dependencies" in str(exc_info.value) 37 | 38 | 39 | def test_cli_import_without_all_dependencies(): 40 | """Test that CLI modules can be imported without all datasource dependencies.""" 41 | # Remove optional dependencies from sys.modules 42 | optional_deps = ["duckdb", "psycopg", "snowflake", "google.cloud.bigquery"] 43 | original_modules = {} 44 | 45 | for dep in optional_deps: 46 | if dep in sys.modules: 47 | original_modules[dep] = sys.modules[dep] 48 | del sys.modules[dep] 49 | 50 | try: 51 | with patch.dict("sys.modules", {dep: None for dep in optional_deps}): 52 | # These imports should succeed even without optional dependencies 53 | from ryoma_ai.cli import main 54 | from ryoma_ai.cli.app import RyomaAI 55 | from ryoma_ai.cli.command_handler import CommandHandler 56 | 57 | # Verify imports succeeded 58 | assert main is not None 59 | assert RyomaAI is not None 60 | assert CommandHandler is not None 61 | finally: 62 | # Restore original modules 63 | for dep, module in original_modules.items(): 64 | sys.modules[dep] = module 65 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/tool.py: -------------------------------------------------------------------------------- 1 | """The Tool Page.""" 2 | 3 | import reflex as rx 4 | import reflex_chakra as rc 5 | from ryoma_lab.states.tool import Tool, ToolState 6 | 7 | 8 | def tool_card(tool: Tool): 9 | """Create a tool card.""" 10 | return rx.dialog.root( 11 | rx.dialog.trigger( 12 | rc.card( 13 | rc.text( 14 | tool.description, 15 | no_of_lines=3, 16 | ), 17 | header=rc.heading(tool.name, size="md"), 18 | # adjust the size and make it scrollable 19 | direction="column", 20 | overflow="auto", 21 | height="300px", 22 | width="200px", 23 | margin="20px", 24 | cursor="pointer", 25 | _hover={"background_color": rx.color("gray", 2)}, 26 | ), 27 | ), 28 | rx.dialog.content( 29 | rx.dialog.title(tool.name, size="6"), 30 | rx.dialog.description(tool.description), 31 | rx.vstack( 32 | rx.heading("Tool Arguments", size="4"), 33 | rx.foreach( 34 | tool.args, 35 | lambda arg: rx.flex( 36 | rx.text( 37 | arg.name, 38 | as_="div", 39 | size="2", 40 | mb="1", 41 | weight="bold", 42 | ), 43 | rx.cond( 44 | arg.description is not None, 45 | rx.text( 46 | arg.description, 47 | ), 48 | ), 49 | direction="column", 50 | spacing="3", 51 | ), 52 | ), 53 | margin_top="20px", 54 | width="100%", 55 | ), 56 | rx.flex( 57 | rx.dialog.close( 58 | rx.button("Close", size="2"), 59 | ), 60 | justify="end", 61 | ), 62 | ), 63 | ) 64 | 65 | 66 | def content_grid(): 67 | """Create a content grid.""" 68 | return rc.flex( 69 | rx.foreach( 70 | ToolState.tools, 71 | lambda tool: tool_card(tool), 72 | ), 73 | flex_wrap="wrap", 74 | ) 75 | 76 | 77 | def tool_component() -> rx.Component: 78 | return rx.vstack( 79 | rc.text("A suite of tools to help you with analyzing your data."), 80 | rx.box( 81 | content_grid(), 82 | margin_top="20px", 83 | width="100%", 84 | ), 85 | # make the page full width 86 | width="100%", 87 | ) 88 | -------------------------------------------------------------------------------- /packages/ryoma_lab/ryoma_lab/components/react_resizable_panels.py: -------------------------------------------------------------------------------- 1 | """Reflex custom component ResizablePanels.""" 2 | 3 | # For wrapping react guide, visit https://reflex.dev/docs/wrapping-react/overview/ 4 | 5 | from types import SimpleNamespace 6 | from typing import Any, Literal 7 | 8 | import reflex as rx 9 | 10 | LiteralDirection = Literal["horizontal", "vertical"] 11 | 12 | lib_name = "react-resizable-panels@^2.0.19" 13 | 14 | 15 | class ResizablePanels(rx.Component): 16 | """ResizablePanels component.""" 17 | 18 | # The React library to wrap. 19 | library = lib_name 20 | 21 | 22 | class PanelRoot(rx.el.Div): 23 | def add_style(self) -> dict[str, Any] | None: 24 | return {"width": "100%", "height": "100%"} 25 | 26 | 27 | class PanelGroup(ResizablePanels): 28 | tag = "PanelGroup" 29 | 30 | alias = "ResizablePanelGroup" 31 | 32 | # Unique id to auto-save the group layout via localStorage 33 | auto_save_id: rx.Var[str] 34 | 35 | # Group orientation 36 | direction: rx.Var[LiteralDirection] 37 | 38 | on_layout: rx.EventHandler[lambda e0: [e0]] 39 | 40 | # not sure how to make this one works 41 | # storage: rx.Var[Any] 42 | 43 | 44 | class Panel(ResizablePanels): 45 | tag = "Panel" 46 | 47 | alias = "ResizablePanel" 48 | 49 | # Whether the panel is collapsible 50 | collapsible: rx.Var[bool] 51 | 52 | # Panel should collapse to this size 53 | collapsed_size: rx.Var[int] 54 | 55 | # Default size of the panel (should be a number between 1 - 100) 56 | default_size: rx.Var[int] 57 | 58 | # Maximum size of the panel (should be a number between 1 - 100) 59 | max_size: rx.Var[int] 60 | 61 | # Minimum size of the panel (should be a number between 1 - 100) 62 | min_size: rx.Var[int] 63 | 64 | # Event handlers triggered when the panel is collapsed 65 | on_collapse: rx.EventHandler[lambda: []] 66 | 67 | # Event handlers triggered when the panel is expanded 68 | on_expand: rx.EventHandler[lambda: []] 69 | 70 | # Event handlers triggered when the panel is resized 71 | on_resize: rx.EventHandler[lambda e0: [e0]] 72 | 73 | # Order of the panel within the group 74 | order: rx.Var[int] 75 | 76 | 77 | class PanelResizeHandle(ResizablePanels): 78 | tag = "PanelResizeHandle" 79 | 80 | alias = "ResizablePanelResizeHandle" 81 | 82 | def add_style(self) -> dict[str, Any] | None: 83 | return { 84 | "width": "7px", 85 | "background": rx.color("accent", 7), 86 | "opacity": 0.0, 87 | "_hover": {"opacity": 1.0}, 88 | } 89 | 90 | 91 | class ResizablePanelsNamespace(SimpleNamespace): 92 | group = staticmethod(PanelGroup.create) 93 | panel = staticmethod(Panel.create) 94 | handle = staticmethod(PanelResizeHandle.create) 95 | 96 | 97 | resizable_panels = ResizablePanelsNamespace() 98 | -------------------------------------------------------------------------------- /packages/ryoma_ai/ryoma_ai/datasource/mysql.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any, Optional 3 | 4 | import ibis 5 | from databuilder.extractor.sql_alchemy_extractor import SQLAlchemyExtractor 6 | from databuilder.job.job import DefaultJob 7 | from databuilder.loader.base_loader import Loader 8 | from databuilder.task.task import DefaultTask 9 | from pyhocon import ConfigFactory 10 | from ryoma_ai.datasource.sql import SqlDataSource 11 | 12 | 13 | class MySqlDataSource(SqlDataSource): 14 | def get_query_plan(self, query: str) -> Any: 15 | pass 16 | 17 | def __init__( 18 | self, 19 | database: Optional[str] = None, 20 | db_schema: Optional[str] = None, 21 | connection_url: Optional[str] = None, 22 | username: Optional[str] = None, 23 | password: Optional[str] = None, 24 | host: Optional[str] = None, 25 | port: Optional[int] = None, 26 | ): 27 | super().__init__(database=database, db_schema=db_schema) 28 | self.username = username 29 | self.password = password 30 | self.host = host 31 | self.port = port 32 | self.connection_url = connection_url 33 | 34 | def _connect(self, **kwargs): 35 | try: 36 | return ibis.mysql.connect( 37 | user=self.username, 38 | password=self.password, 39 | host=self.host, 40 | port=self.port, 41 | database=self.database, 42 | **kwargs, 43 | ) 44 | except Exception as e: 45 | self._handle_connection_error(e, "mysql") 46 | 47 | def connection_string(self): 48 | return f"mysql+mysqlconnector://{self.username}:{self.password}@{self.host}:{self.port}/{self.database}" 49 | 50 | def crawl_catalog(self, loader: Loader, where_clause_suffix: Optional[str] = ""): 51 | from databuilder.extractor.mysql_metadata_extractor import ( 52 | MysqlMetadataExtractor, 53 | ) 54 | 55 | logging.info("Crawling data catalog from Mysql") 56 | job_config = ConfigFactory.from_dict( 57 | { 58 | "extractor.mysql_metadata.{}".format( 59 | MysqlMetadataExtractor.WHERE_CLAUSE_SUFFIX_KEY 60 | ): where_clause_suffix, 61 | "extractor.mysql_metadata.{}".format( 62 | MysqlMetadataExtractor.USE_CATALOG_AS_CLUSTER_NAME 63 | ): True, 64 | "extractor.mysql_metadata.extractor.sqlalchemy.{}".format( 65 | SQLAlchemyExtractor.CONN_STRING 66 | ): self.connection_string(), 67 | } 68 | ) 69 | job = DefaultJob( 70 | conf=job_config, 71 | task=DefaultTask(extractor=MysqlMetadataExtractor(), loader=loader), 72 | ) 73 | job.launch() 74 | 75 | 76 | class MySqlConfig: 77 | pass 78 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # For the full list of built-in configuration values, see the documentation: 4 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 5 | 6 | # -- Project information ----------------------------------------------------- 7 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information 8 | 9 | import os 10 | import sys 11 | import warnings 12 | 13 | sys.path.append(os.path.abspath(os.path.dirname(__file__))) 14 | 15 | project = "Ryoma" 16 | copyright = "2024, WuHen-Li" 17 | author = "WuHen-Li" 18 | release = "v1.0.0-beta" 19 | 20 | # -- General configuration --------------------------------------------------- 21 | # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration 22 | 23 | extensions = [ 24 | "myst_parser", 25 | "sphinx_copybutton", 26 | "sphinx_exec_code", 27 | "sphinx_tabs.tabs", 28 | "sphinx.ext.autodoc", 29 | "sphinx.ext.autosummary", 30 | "sphinx.ext.coverage", 31 | "sphinx.ext.doctest", 32 | "sphinx.ext.githubpages", 33 | "sphinx.ext.graphviz", 34 | "sphinx.ext.ifconfig", 35 | "sphinx.ext.intersphinx", 36 | "sphinx.ext.mathjax", 37 | "sphinx.ext.napoleon", 38 | "sphinx.ext.viewcode", 39 | ] 40 | 41 | myst_enable_extensions = [ 42 | "amsmath", 43 | "attrs_inline", 44 | "colon_fence", 45 | "deflist", 46 | "dollarmath", 47 | "fieldlist", 48 | "html_admonition", 49 | "html_image", 50 | "linkify", 51 | "replacements", 52 | "smartquotes", 53 | "strikethrough", 54 | "substitution", 55 | "tasklist", 56 | ] 57 | 58 | coverage_show_missing_items = True 59 | exclude_patterns = [] 60 | graphviz_output_format = "svg" 61 | html_css_files = ["css/custom.css"] 62 | html_favicon = "modelinfer.png" 63 | html_sidebars = {} 64 | html_static_path = ["_static"] 65 | html_theme = "furo" 66 | language = "en" 67 | mathdef_link_only = True 68 | master_doc = "index" 69 | pygments_style = "default" 70 | source_suffix = [".rst", ".md"] 71 | templates_path = ["_templates"] 72 | 73 | html_context = { 74 | "default_mode": "auto", # auto: the documentation theme will follow the system default that you have set (light or dark) 75 | } 76 | 77 | html_theme_options = { 78 | "light_logo": "ryoma.png", 79 | "dark_logo": "ryoma.png", 80 | } 81 | 82 | intersphinx_mapping = { 83 | "numpy": ("https://numpy.org/doc/stable/", None), 84 | "python": (f"https://docs.python.org/{3.10}/", None), 85 | "scipy": ("https://docs.scipy.org/doc/scipy/", None), 86 | "torch": ("https://pytorch.org/docs/stable/", None), 87 | } 88 | 89 | sphinx_gallery_conf = { 90 | "examples_dirs": ["examples"], 91 | "gallery_dirs": ["auto_examples", "auto_tutorial"], 92 | "capture_repr": ("_repr_html_", "__repr__"), 93 | "ignore_repr_types": r"matplotlib.text|matplotlib.axes", 94 | } 95 | 96 | warnings.filterwarnings("ignore", category=FutureWarning) 97 | --------------------------------------------------------------------------------