├── .github └── workflows │ └── python-publish.yml ├── .gitignore ├── Example.ipynb ├── LICENSE ├── README.md ├── architecture.png ├── jaiqu.png ├── jaiqu ├── __init__.py ├── cli.py ├── helpers.py ├── jaiqu.py └── tests │ ├── __init__.py │ ├── calendar │ ├── event.schema.json │ ├── gcal │ │ └── input.json │ └── outlook │ │ └── input.json │ └── llms │ ├── anthropic │ └── input.json │ ├── arize_openetelemetry │ └── input.json │ ├── errors.schema.json │ ├── llms.schema.json │ └── openai │ └── schema.json ├── pyproject.toml ├── requirements.txt ├── samples ├── data.json └── schema.json ├── streamlit_app.py └── tox.ini /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | id-token: write 18 | 19 | jobs: 20 | deploy: 21 | 22 | runs-on: ubuntu-latest 23 | 24 | steps: 25 | - uses: actions/checkout@v3 26 | - name: Set up Python 27 | uses: actions/setup-python@v3 28 | with: 29 | python-version: '3.x' 30 | - name: Install dependencies 31 | run: | 32 | python -m pip install --upgrade pip 33 | pip install build 34 | - name: Build package 35 | run: python -m build 36 | - name: Publish package 37 | uses: pypa/gh-action-pypi-publish@v1.8.11 38 | with: 39 | user: __token__ 40 | password: ${{ secrets.PYPI_API_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | 162 | # Ignore .vscode folder 163 | .vscode/ 164 | 165 | # Ignore .DS_Store file 166 | .DS_Store 167 | 168 | # Ignore .env file 169 | .env 170 | 171 | -------------------------------------------------------------------------------- /Example.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import json\n", 10 | "import jq\n", 11 | "from jaiqu import validate_schema, translate_schema" 12 | ] 13 | }, 14 | { 15 | "cell_type": "markdown", 16 | "metadata": {}, 17 | "source": [ 18 | "### Desired data format \n", 19 | "\n", 20 | "Create a `jsonschema` dictionary for the format of data you want. Data extracted from your input will be extracted into this format." 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "schema = {\n", 30 | " \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n", 31 | " \"type\": \"object\",\n", 32 | " \"properties\": {\n", 33 | " \"id\": {\n", 34 | " \"type\": [\"string\", \"null\"],\n", 35 | " \"description\": \"A unique identifier for the record.\"\n", 36 | " },\n", 37 | " \"date\": {\n", 38 | " \"type\": \"string\",\n", 39 | " \"description\": \"A string describing the date.\"\n", 40 | " },\n", 41 | " \"model\": {\n", 42 | " \"type\": \"string\",\n", 43 | " \"description\": \"A text field representing the model used.\"\n", 44 | " }\n", 45 | " },\n", 46 | " \"required\": [\n", 47 | " \"id\",\n", 48 | " \"date\"\n", 49 | " ]\n", 50 | "}" 51 | ] 52 | }, 53 | { 54 | "cell_type": "markdown", 55 | "metadata": {}, 56 | "source": [ 57 | "### Sample input data\n", 58 | "Provoide an input JSON dictionary containing the data you want to extract." 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": null, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "input_json = {\n", 68 | " \"call.id\": \"123\",\n", 69 | " \"datetime\": \"2022-01-01\",\n", 70 | " \"timestamp\": 1640995200,\n", 71 | " \"Address\": \"123 Main St\",\n", 72 | " \"user\": {\n", 73 | " \"name\": \"John Doe\",\n", 74 | " \"age\": 30,\n", 75 | " \"contact\": \"john@email.com\"\n", 76 | " }\n", 77 | "}" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "### (Optional) Create hints\n", 85 | "The jaiqu agent may not know certain concepts. For example, you might want to have some keys interpreted a certain way (i.e. interpret \"contact\" as \"email\"). For tricky interpretations, create hints." 86 | ] 87 | }, 88 | { 89 | "cell_type": "code", 90 | "execution_count": null, 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "key_hints = \"We are processing outputs of an containing an id and a date of a user.\"" 95 | ] 96 | }, 97 | { 98 | "cell_type": "code", 99 | "execution_count": null, 100 | "metadata": {}, 101 | "outputs": [], 102 | "source": [ 103 | "schema_properties, valid = validate_schema(input_json, schema, key_hints=key_hints, openai_api_key='key')" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "metadata": {}, 109 | "source": [ 110 | "### Verify schema\n", 111 | "Verify the input JSON contains the keys and values requested in your schema" 112 | ] 113 | }, 114 | { 115 | "cell_type": "code", 116 | "execution_count": null, 117 | "metadata": {}, 118 | "outputs": [], 119 | "source": [ 120 | "print('Schema is valid:',valid)\n", 121 | "print('-'*10)\n", 122 | "print(json.dumps(schema_properties, indent=2))" 123 | ] 124 | }, 125 | { 126 | "cell_type": "code", 127 | "execution_count": null, 128 | "metadata": {}, 129 | "outputs": [], 130 | "source": [ 131 | "jq_query = translate_schema(input_json, schema, key_hints=key_hints, max_retries=20, openai_api_key='key')" 132 | ] 133 | }, 134 | { 135 | "cell_type": "markdown", 136 | "metadata": {}, 137 | "source": [ 138 | "## Finalized jq query" 139 | ] 140 | }, 141 | { 142 | "cell_type": "code", 143 | "execution_count": null, 144 | "metadata": {}, 145 | "outputs": [], 146 | "source": [ 147 | "jq_query" 148 | ] 149 | }, 150 | { 151 | "cell_type": "markdown", 152 | "metadata": {}, 153 | "source": [ 154 | "### Check the jq query results" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": null, 160 | "metadata": { 161 | "scrolled": true 162 | }, 163 | "outputs": [], 164 | "source": [ 165 | "result = jq.compile(jq_query).input(input_json).all()\n", 166 | "print(jq.compile(jq_query).input(input_json).all())" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "metadata": {}, 173 | "outputs": [], 174 | "source": [] 175 | } 176 | ], 177 | "metadata": { 178 | "kernelspec": { 179 | "display_name": "Python 3 (ipykernel)", 180 | "language": "python", 181 | "name": "python3" 182 | }, 183 | "language_info": { 184 | "codemirror_mode": { 185 | "name": "ipython", 186 | "version": 3 187 | }, 188 | "file_extension": ".py", 189 | "mimetype": "text/x-python", 190 | "name": "python", 191 | "nbconvert_exporter": "python", 192 | "pygments_lexer": "ipython3", 193 | "version": "3.11.5" 194 | } 195 | }, 196 | "nbformat": 4, 197 | "nbformat_minor": 2 198 | } 199 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 AgentOps 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | Jaiqu 3 |

4 |

5 | Natural language to DSL agent for JSON querying 6 |

7 |

8 | 9 | Python 10 | Version 11 | 12 |

13 |

14 | 🔗 Main site 15 |   •   16 | 🐦 Twitter 17 |   •   18 | 📢 Discord 19 |   •   20 | 🖇️ AgentOps 21 |

22 | 23 | # Jaiqu 24 | 25 | [![Streamlit App](https://static.streamlit.io/badges/streamlit_badge_black_white.svg)]([https://.streamlit.app](https://jaiqu-agent.streamlit.app)) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) ![PyPI - Version](https://img.shields.io/pypi/v/jaiqu) 26 | [![X (formerly Twitter) Follow](https://img.shields.io/twitter/follow/AgentOpsAI)](https://x.com/agentopsai) 27 | 28 | 29 | Replicable, AI-generated JSON transformation queries. Transform any JSON into any schema automatically. 30 | 31 | Jaiqu is an AI agent for creating repeatable JSON transforms using [jq query language](https://jqlang.github.io/jq/) syntax. Jaiqu translates any arbitrary JSON inputs into any desired schema. 32 | 33 | Building AI agents? Check out [AgentOps](https://agentops.ai/?jaiqu) 34 | 35 | [Live Demo](https://jaiqu-agent.streamlit.app) 36 | 37 | [Video Overview](https://youtu.be/exbV35-XWA0) 38 | 39 | ![Alt text](architecture.png) 40 | 41 | 42 | ### Features 43 | * **Translate any schema to any schema** AI agent automatically maps data from a source schema to a desired format by iteratively prompting GPT-4 to create valid jq query syntax. 44 | * **Schema validation** Given a requirement schema, automatically validate whether the required data is present in the input json. 45 | * **Fuzzy term matching** Infers keys based on symantic similarity (i.e. datetime vs date_time). GPT-4 automatically maps and translates input keys to desired output keys. 46 | 47 | ### Example usage: 48 | 49 | ```python 50 | from jaiqu import validate_schema, translate_schema 51 | 52 | # Desired data format 53 | schema = { 54 | "$schema": "http://json-schema.org/draft-07/schema#", 55 | "type": "object", 56 | "properties": { 57 | "id": { 58 | "type": ["string", "null"], 59 | "description": "A unique identifier for the record." 60 | }, 61 | "date": { 62 | "type": "string", 63 | "description": "A string describing the date." 64 | }, 65 | "model": { 66 | "type": "string", 67 | "description": "A text field representing the model used." 68 | } 69 | }, 70 | "required": [ 71 | "id", 72 | "date" 73 | ] 74 | } 75 | 76 | # Provided data 77 | input_json = { 78 | "call.id": "123", 79 | "datetime": "2022-01-01", 80 | "timestamp": 1640995200, 81 | "Address": "123 Main St", 82 | "user": { 83 | "name": "John Doe", 84 | "age": 30, 85 | "contact": "john@email.com" 86 | } 87 | } 88 | 89 | # (Optional) Create hints so the agent knows what to look for in the input 90 | key_hints="We are processing outputs of an containing an id, a date, and a model. All the required fields should be present in this input, but the names might be different." 91 | ``` 92 | 93 | Validating an input json contains all the information required in a schema 94 | ```python 95 | schema_properties, valid = validate_schema(input_json, schema, key_hints) 96 | 97 | print(schema_properties) 98 | 99 | >>> { 100 | "id": { 101 | "identified": true, 102 | "key": "call.id", 103 | "message": "123", 104 | "type": [ 105 | "string", 106 | "null" 107 | ], 108 | "description": "A unique identifier for the record.", 109 | "required": true 110 | }, 111 | "date": { 112 | "identified": true, 113 | "key": "datetime", 114 | "message": "2022-01-01", 115 | "type": "string", 116 | "description": "A string describing the date." 117 | "required": true 118 | } 119 | } 120 | print(valid) 121 | >>> True 122 | ``` 123 | 124 | Creating a repeatable jq query for extracitng data from identically formatted input JSONs 125 | ```python 126 | jq_query = jaiqu.translate_schema(input_json, schema, key_hints, max_retries=30) 127 | >>>'{"id": .attributes["call.id"], "date": .datetime}' 128 | ``` 129 | 130 | ### CLI Usage 131 | 132 | ```bash 133 | git clone https://github.com/AgentOps-AI/Jaiqu.git 134 | cd Jaiqu/samples/ 135 | 136 | jaiqu -s schema.json -d data.json 137 | # Validating schema: 100%|███████████████████████████| 3/3 [00:11<00:00, 3.73s/it, Key: model] 138 | # Translating schema: 100%|███████████████████████████| 2/2 [00:02<00:00, 1.46s/it, Key: date] 139 | # Retry attempts: 20%|███████████████████▌ | 2/10 [00:02<00:11, 1.46s/it] 140 | # Validation attempts: 10%|█████████▎ | 1/10 [00:00<00:08, 1.02it/s] 141 | 142 | jq '{ "id": (if .["call.id"] then .["call.id"] else null end), "date": (if has("datetime") then .datetime else "None" end) }' data.json 143 | # Run command? 144 | # [E]xecute, [A]bort: e 145 | # { 146 | # "id": "123", 147 | # "date": "2022-01-01" 148 | # } 149 | ``` 150 | 151 | > Note: usage is currently limited to python 3.9 & 3.10 152 | 153 | ## Installation 154 | 155 | #### Recommended: [PyPI](https://pypi.org/project/jaiqu/): 156 | 157 | ```bash 158 | pip install jaiqu 159 | ``` 160 | 161 | 162 | ## Architecture 163 | Unraveling the Jaiqu agentic workflow pattern 164 | ```mermaid 165 | flowchart TD 166 | A[Start translate_schema] --> B{Validate input schema} 167 | B -- Valid --> C[For each key, create a jq filter query] 168 | B -- Invalid --> D[Throw RuntimeError] 169 | C --> E[Compile and Test jq Filter] 170 | E -- Success --> F[Validate JSON] 171 | E -- Fail --> G[Retry Create jq Filter] 172 | G -- Success --> E 173 | G -- Fail n times--> H[Throw RuntimeError] 174 | F -- Success --> I[Return jq query string] 175 | F -- Fail --> J[Retry Validate JSON] 176 | J -- Success --> I 177 | J -- Fail n times --> K[Throw RuntimeError] 178 | ``` 179 | 180 | 181 | ## Running tests 182 | 183 | 0. Install `pytest` if you don't have it already 184 | 185 | ```shell 186 | pip install pytest 187 | ``` 188 | 189 | 1. Run the `tests/` folder while in the parent directory 190 | 191 | ```shell 192 | pytest tests 193 | ``` 194 | 195 | This repo also supports `tox`, simply run `python -m tox`. 196 | 197 | ## Contributing 198 | 199 | Contributions to Jaiqu are welcome! Feel free to create an [issue](https://github.com/AgentOps-AI/jaiqu/issues) for any bug reports, complaints, or feature suggestions. 200 | 201 | ## License 202 | 203 | Jaiqu is released under the MIT License. 204 | -------------------------------------------------------------------------------- /architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgentOps-AI/Jaiqu/a2ce57eddb967e883d0cdbde91507b636230b86a/architecture.png -------------------------------------------------------------------------------- /jaiqu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgentOps-AI/Jaiqu/a2ce57eddb967e883d0cdbde91507b636230b86a/jaiqu.png -------------------------------------------------------------------------------- /jaiqu/__init__.py: -------------------------------------------------------------------------------- 1 | from .jaiqu import validate_schema, translate_schema 2 | -------------------------------------------------------------------------------- /jaiqu/cli.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sys 3 | 4 | import typer 5 | from typer import Option, Typer 6 | from click.types import Choice 7 | 8 | from .jaiqu import translate_schema 9 | from .helpers import run_command 10 | 11 | typer_app = Typer() 12 | 13 | 14 | @typer_app.command() 15 | def jaiqu( 16 | schema_file: str = Option(..., "-s", "--schema", help="Json schema file path"), 17 | data_file: str = Option( 18 | None, 19 | "-d", 20 | "--data", 21 | help="Json data file path. if not passed will try to read from stdin", 22 | ), 23 | quiet: bool = Option(False, "-q", "--quiet", help="Quiet mode, only print errors"), 24 | key_hints: str = Option( 25 | None, 26 | "-k", 27 | "--key-hints", 28 | help="Extra prompt for the ai to help it complete the task", 29 | ), 30 | max_retries: int = Option( 31 | 10, 32 | "-r", 33 | "--max-retries", 34 | help="Max number of retries for the ai to complete the task", 35 | ), 36 | ): 37 | """ 38 | Validate and translate a json schema to jq filter 39 | """ 40 | with open(schema_file) as f: 41 | output_schema = json.load(f) 42 | if data_file is None: 43 | if sys.stdin.isatty(): 44 | sys.exit("Error: No data piped to stdin.") 45 | else: 46 | if not quiet: 47 | print("--data not provided, reading from stdin") 48 | data_file = sys.stdin.read() 49 | input_json = json.loads(data_file) 50 | else: 51 | with open(data_file) as f: 52 | input_json = json.load(f) 53 | 54 | query = translate_schema( 55 | output_schema=output_schema, 56 | input_json=input_json, 57 | key_hints=key_hints, 58 | max_retries=max_retries, 59 | quiet=quiet 60 | ) 61 | full_completion = f"jq '{query}' {data_file}" 62 | print(f"\n{full_completion}\nRun command?") 63 | option = typer.prompt( 64 | text="[E]xecute, [A]bort", 65 | type=Choice(("e", "a"), case_sensitive=False), 66 | default="e", 67 | show_choices=False, 68 | show_default=False, 69 | ) 70 | if option in ("e"): 71 | run_command(full_completion) 72 | 73 | 74 | def main(): 75 | typer_app() 76 | 77 | 78 | if __name__ == "__main__": 79 | main() 80 | -------------------------------------------------------------------------------- /jaiqu/helpers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import platform 3 | import shlex 4 | 5 | from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam 6 | from typing import Optional, Union 7 | from openai import OpenAI 8 | 9 | # jaiqu doesn't seem to consistently work with gpt-4-turbo or gpt-4o-mini 10 | OPENAI_MODEL = "gpt-4o" 11 | 12 | 13 | def to_key(response: str) -> Union[str, None]: 14 | """Extract the key from the response.""" 15 | key = response.split('`')[-2] 16 | if key == "None": 17 | return None 18 | return key 19 | 20 | 21 | def identify_key(key, value, input_schema, openai_api_key=None, key_hints=None) -> tuple[Optional[str], str]: 22 | """Identify if a key is present in a schema. This function uses the OpenAI API to generate a response.""" 23 | 24 | system_message = """You are a perfect system designed to validate and extract data from JSON files. 25 | For each field, you provide a short check about your reasoning. Go line by line and do a side by side comparison. For example: 26 | 27 | Schema: 28 | { 29 | "id": "123", 30 | "date": "2022-01-01", 31 | "timestamp": 1640995200, 32 | "Address": "123 Main St", 33 | "input": "hello" 34 | "user": { 35 | "name": "John Doe", 36 | "age": 30, 37 | "contact": "john@email.com" 38 | } 39 | } 40 | 41 | "id" | "id" : The field name is identical. Extracted key: `id` 42 | "Date" | "date" : The field name is the same except for capitalization. Extracted key: `date` 43 | "time" | "timestamp": This is the same concept, therefore it counts. Extracted key: `timestamp` 44 | "addr" | "Address": This is the same concept and is a , therefore it counts. Extracted key: `Address` 45 | "cats" | None: There no matching or remotely similar fields. Extracted key: `None` 46 | "input" | "input": The names match, but the types are different. Extracted key: `None` 47 | 48 | If we are given hints, we can use them to help us determine if a key is present. For example, if the hint states we are searching for emails in a schema where "email" is not present, we can infer: 49 | "email" | "contact": The names are different, but contact implies email. Extracted key: `contact` 50 | 51 | Some fields may not have the exact same names. Use your best judgement about the meaning of the field to determine if they should count. 52 | Think of the key you are searching for in relation to other keys in the schema; this may help you determine if the key is present. 53 | The content of the field may also help you determine if the key is present. For example, if you are searching for a date, and the field contains a date, it is likely the key you are searching for. 54 | You come to a definitive conclusion, the name of the key you found, at the end of your response.""" 55 | 56 | if key_hints is not None: 57 | system_message += "\n\nAdditionally, consider the following: " + key_hints 58 | messages: list[ChatCompletionMessageParam] = [{ 59 | "role": "system", 60 | "content": system_message 61 | }, 62 | { 63 | "role": "user", 64 | "content": f"Is `{key}` of type `{value}` present in the desired schema?:\n\n {input_schema}" 65 | }] 66 | 67 | reasoning_response = OpenAI(api_key=openai_api_key).chat.completions.create(messages=messages, 68 | model=OPENAI_MODEL, 69 | ) 70 | completion = str(reasoning_response.choices[0].message.content) 71 | 72 | return to_key(completion), completion 73 | 74 | 75 | def create_jq_string(input_schema, key, value, openai_api_key) -> str: 76 | messages: list[ChatCompletionMessageParam] = [{ 77 | "role": "system", 78 | "content": f"""You are a perfect jq engineer designed to validate and extract data from JSON files using jq. Only reply with code. Do NOT use any natural language. Do NOT use markdown i.e. ```. 79 | 80 | Your task is to create a jq filter to extract the data from the following JSON: 81 | 82 | {input_schema} 83 | 84 | You will be given the type of the key you need to extract. Only extract the key that corresponds to the type. 85 | 86 | * Do NOT extract values based on exact indices. 87 | * Do NOT create default values. 88 | * If the key is not present and it is not required, DO NOT extract it. Return the literal value `None`. This is NOT a string, but the actual value `None`. 89 | 90 | """ 91 | }, 92 | { 93 | "role": "user", 94 | "content": f"Write jq to extract the key `{key}`of type `{value['type']}`" 95 | }] 96 | 97 | response = OpenAI(api_key=openai_api_key).chat.completions.create(messages=messages, model=OPENAI_MODEL) 98 | return str(response.choices[0].message.content) 99 | 100 | 101 | def repair_query(query, error, input_schema, openai_api_key): 102 | messages: list[ChatCompletionMessageParam] = [{ 103 | "role": "system", 104 | "content": "You are a perfect jq engineer designed to validate and extract data from JSON files using jq. Only reply with code. Do NOT use any natural language. Do NOT use markdown i.e. ```." 105 | }, 106 | { 107 | "role": "user", 108 | "content": f"""The following query returned an error while extracting from the following schema: 109 | 110 | Query: {query} 111 | 112 | Error: {error} 113 | 114 | Schema: {input_schema}"""}] 115 | response = OpenAI(api_key=openai_api_key).chat.completions.create(messages=messages, 116 | model=OPENAI_MODEL) 117 | return str(response.choices[0].message.content) 118 | 119 | 120 | def dict_to_jq_filter(transformation_dict) -> str: 121 | jq_filter_parts = [] 122 | for new_key, json_path in transformation_dict.items(): 123 | # For each item in the dictionary, create a string '"new_key": json_path' 124 | # Note: json_path is assumed to be a valid jq path expression as a string 125 | jq_filter_parts.append(f'"{new_key}": {json_path}') 126 | 127 | # Join all parts with commas and wrap in braces to form a valid jq object filter 128 | jq_filter = "{ " + ",\n ".join(jq_filter_parts) + " }" 129 | return jq_filter 130 | 131 | 132 | def run_command(command: str) -> None: 133 | """ 134 | Runs a command in the user's shell. 135 | It is aware of the current user's $SHELL. 136 | :param command: A shell command to run. 137 | """ 138 | if platform.system() == "Windows": 139 | is_powershell = len(os.getenv("PSModulePath", "").split(os.pathsep)) >= 3 140 | full_command = ( 141 | f'powershell.exe -Command "{command}"' 142 | if is_powershell 143 | else f'cmd.exe /c "{command}"' 144 | ) 145 | else: 146 | shell = os.environ.get("SHELL", "/bin/sh") 147 | full_command = f"{shell} -c {shlex.quote(command)}" 148 | 149 | os.system(full_command) 150 | -------------------------------------------------------------------------------- /jaiqu/jaiqu.py: -------------------------------------------------------------------------------- 1 | 2 | import jq 3 | import json 4 | from jsonschema import validate 5 | from tqdm.auto import tqdm # Use the auto submodule for notebook-friendly output if necessary 6 | from .helpers import identify_key, create_jq_string, repair_query, dict_to_jq_filter 7 | 8 | 9 | def validate_schema(input_json: dict, output_schema: dict, openai_api_key: str | None = None, key_hints=None, quiet=False) -> tuple[dict, bool]: 10 | """Validates the schema of the input JSON against the output schema. 11 | Args: 12 | input_json (dict): The input JSON parsed into a dictionary. 13 | output_schema (dict): The output schema against which the input JSON schema needs to be validated. 14 | openai_api_key (str | None, optional): The OpenAI API key. Defaults to None. 15 | key_hints (any, optional): Key hints to assist in identifying keys. Defaults to None. 16 | 17 | Returns: 18 | tuple[dict, bool]: A tuple containing the results of the validation and a boolean indicating if the validation was successful. 19 | """ 20 | 21 | results = {} 22 | valid = True 23 | with tqdm(total=len(output_schema['properties']), desc="Validating schema", disable=quiet) as pbar: 24 | for key, value in output_schema['properties'].items(): 25 | pbar.set_postfix_str(f"Key: {key}", refresh=True) 26 | response_key, response_reasoning = identify_key(key, value, input_json, openai_api_key, key_hints) 27 | 28 | if response_key is not None: 29 | results[key] = {"identified": True, "key": response_key, 30 | "message": response_reasoning, 31 | **value} 32 | else: 33 | results[key] = {"identified": False, "key": response_key, 34 | "message": response_reasoning, 35 | **value} 36 | if key in output_schema['required']: 37 | results[key]['required'] = True 38 | if results[key]['identified'] == False: 39 | valid = False 40 | else: 41 | results[key]['required'] = False 42 | pbar.update(1) 43 | 44 | return results, valid 45 | 46 | 47 | def translate_schema(input_json, output_schema, openai_api_key: str | None = None, key_hints=None, max_retries=10, quiet=False) -> str: 48 | """ 49 | Translate the input JSON schema into a filtering query using jq. 50 | 51 | Args: 52 | input_json (dict): The input JSON to be reformatted. 53 | output_schema (dict): The desired output schema using standard schema formatting. 54 | openai_api_key (str, optional): OpenAI API key. Defaults to None. 55 | key_hints (None, optional): Hints for translating keys. Defaults to None. 56 | max_retries (int, optional): Maximum number of retries for creating a valid jq filter. Defaults to 10. 57 | 58 | Returns: 59 | str: The filtering query in jq syntax. 60 | 61 | Raises: 62 | RuntimeError: If the input JSON does not contain the required data to satisfy the output schema. 63 | RuntimeError: If failed to create a valid jq filter after maximum retries. 64 | RuntimeError: If failed to validate the jq filter after maximum retries. 65 | """ 66 | 67 | schema_properties, is_valid = validate_schema(input_json, output_schema, key_hints=key_hints, openai_api_key=openai_api_key, quiet=quiet) 68 | if not is_valid: 69 | raise RuntimeError( 70 | f"The input JSON does not contain the required data to satisfy the output schema: \n\n{json.dumps(schema_properties, indent=2)}") 71 | 72 | filtered_schema = {k: v for k, v in schema_properties.items() if v['identified'] == True} 73 | 74 | filter_query = {} 75 | 76 | with tqdm(total=len(filtered_schema), desc="Translating schema", disable=quiet) as pbar, tqdm(total=max_retries, desc="Retry attempts", disable=quiet) as pbar_retries: 77 | for key, value in filtered_schema.items(): 78 | pbar.set_postfix_str(f"Key: {key}", refresh=True) 79 | jq_string = create_jq_string(input_json, key, value, openai_api_key) 80 | 81 | # If the response is empty, skip the key 82 | if jq_string == "None": 83 | pbar.update(1) 84 | continue 85 | 86 | tries = 0 87 | while True: 88 | try: 89 | key_query = jq.compile(jq_string).input(input_json).all() 90 | break 91 | except Exception as e: 92 | tries += 1 93 | pbar_retries.update(1) 94 | jq_string = repair_query(jq_string, str(e), input_json, openai_api_key) 95 | if tries >= max_retries: 96 | raise RuntimeError( 97 | f"Failed to create a valid jq filter for key '{key}' after {max_retries} retries.") 98 | pbar.update(1) 99 | filter_query[key] = jq_string 100 | pbar.close() 101 | pbar_retries.close() 102 | complete_filter = dict_to_jq_filter(filter_query) 103 | # Validate JSON 104 | tries = 0 105 | with tqdm(total=max_retries, desc="Validation attempts", disable=quiet) as pbar_validation: 106 | while True: 107 | try: 108 | result = jq.compile(complete_filter).input(input_json).all()[0] 109 | validate(instance=result, schema=output_schema) 110 | pbar_validation.close() 111 | break 112 | except Exception as e: 113 | tries += 1 114 | pbar_validation.update(1) 115 | if tries >= max_retries: 116 | raise RuntimeError(f"Failed to validate the jq filter after {max_retries} retries.") 117 | complete_filter = repair_query(complete_filter, str(e), input_json, openai_api_key) 118 | pbar.close() 119 | return complete_filter 120 | -------------------------------------------------------------------------------- /jaiqu/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AgentOps-AI/Jaiqu/a2ce57eddb967e883d0cdbde91507b636230b86a/jaiqu/tests/__init__.py -------------------------------------------------------------------------------- /jaiqu/tests/calendar/event.schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "type": "object", 4 | "properties": { 5 | "event_name": { 6 | "type": "string", 7 | "description": "The title of the event" 8 | }, 9 | "event_description": { 10 | "type": "string", 11 | "description": "Descriptive details of the event" 12 | }, 13 | "location": { 14 | "type": "string", 15 | "description": "Where is the event" 16 | } 17 | }, 18 | "required": [ 19 | "event_name", 20 | "location" 21 | 22 | ] 23 | } -------------------------------------------------------------------------------- /jaiqu/tests/calendar/gcal/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "kind": "calendar#event", 3 | "etag": "\"317317317317317317\"", 4 | "id": "123abc456def", 5 | "status": "confirmed", 6 | "htmlLink": "https://www.google.com/calendar/event?eid=123abc456def", 7 | "created": "2024-02-10T12:00:00Z", 8 | "updated": "2024-02-10T15:00:00Z", 9 | "summary": "Project Launch Meeting", 10 | "description": "Initial meeting to discuss the roadmap and deliverables for the new project.", 11 | "location": "Conference Room B, 123 Business Rd, City, Country", 12 | "colorId": "9", 13 | "creator": { 14 | "id": "creator01", 15 | "email": "creator@example.com", 16 | "displayName": "Alex Smith", 17 | "self": false 18 | }, 19 | "organizer": { 20 | "id": "org01", 21 | "email": "organizer@example.com", 22 | "displayName": "Company ABC", 23 | "self": true 24 | }, 25 | "start": { 26 | "date": null, 27 | "dateTime": "2024-02-15T09:00:00Z", 28 | "timeZone": "America/New_York" 29 | }, 30 | "end": { 31 | "date": null, 32 | "dateTime": "2024-02-15T10:00:00Z", 33 | "timeZone": "America/New_York" 34 | }, 35 | "endTimeUnspecified": false, 36 | "recurrence": [ 37 | "RRULE:FREQ=WEEKLY;COUNT=4" 38 | ], 39 | "recurringEventId": "", 40 | "originalStartTime": { 41 | "date": null, 42 | "dateTime": "2024-02-15T09:00:00Z", 43 | "timeZone": "America/New_York" 44 | }, 45 | "transparency": "opaque", 46 | "visibility": "public", 47 | "iCalUID": "123abc456def@google.com", 48 | "sequence": 0, 49 | "attendees": [ 50 | { 51 | "id": "attendee01", 52 | "email": "attendee1@example.com", 53 | "displayName": "Jordan Doe", 54 | "organizer": false, 55 | "self": false, 56 | "resource": false, 57 | "optional": false, 58 | "responseStatus": "needsAction", 59 | "comment": "", 60 | "additionalGuests": 0 61 | } 62 | ], 63 | "attendeesOmitted": false, 64 | "extendedProperties": { 65 | "private": { 66 | "projectCode": "PRJ12345" 67 | }, 68 | "shared": { 69 | "agendaLink": "https://example.com/meeting-agenda" 70 | } 71 | }, 72 | "hangoutLink": "https://meet.google.com/abc-defg-hij", 73 | "conferenceData": { 74 | "createRequest": { 75 | "requestId": "xyz123abc456", 76 | "conferenceSolutionKey": { 77 | "type": "hangoutsMeet" 78 | }, 79 | "status": { 80 | "statusCode": "success" 81 | } 82 | }, 83 | "entryPoints": [ 84 | { 85 | "entryPointType": "video", 86 | "uri": "https://meet.google.com/abc-defg-hij", 87 | "label": "Google Meet", 88 | "pin": "123456", 89 | "accessCode": "", 90 | "meetingCode": "abc-defg-hij", 91 | "passcode": "", 92 | "password": "" 93 | } 94 | ], 95 | "conferenceSolution": { 96 | "key": { 97 | "type": "hangoutsMeet" 98 | }, 99 | "name": "Google Meet", 100 | "iconUri": "https://meet.google.com/icon.png" 101 | }, 102 | "conferenceId": "abc-defg-hij", 103 | "signature": "ABCD1234EF" 104 | }, 105 | "gadget": { 106 | "type": "Google Docs", 107 | "title": "Meeting Agenda", 108 | "link": "https://docs.google.com/document/d/example", 109 | "iconLink": "https://ssl.gstatic.com/docs/doclist/images/icon_11_document_list.png", 110 | "width": 300, 111 | "height": 200, 112 | "display": "chip", 113 | "preferences": { 114 | "documentId": "1A2b3C4d5E6f" 115 | } 116 | }, 117 | "anyoneCanAddSelf": false, 118 | "guestsCanInviteOthers": true, 119 | "guestsCanModify": false, 120 | "guestsCanSeeOtherGuests": true, 121 | "privateCopy": false, 122 | "locked": false, 123 | "reminders": { 124 | "useDefault": true, 125 | "overrides": [] 126 | }, 127 | "source": { 128 | "url": "https://example.com/project-details", 129 | "title": "Project Details Page" 130 | }, 131 | "workingLocationProperties": { 132 | "type": "customLocation", 133 | "homeOffice": null, 134 | "customLocation": { 135 | "label": "Remote" 136 | }, 137 | "officeLocation": { 138 | "buildingId": "", 139 | "floorId": "", 140 | "floorSectionId": "", 141 | "deskId": "", 142 | "label": "" 143 | } 144 | }, 145 | "outOfOfficeProperties": { 146 | "autoDeclineMode": "DECLINED", 147 | "declineMessage": "I will be out of office during this event." 148 | }, 149 | "focusTimeProperties": { 150 | "autoDeclineMode": "FOCUSED", 151 | "declineMessage": "I am focusing on deep work during this time.", 152 | "chatStatus": "DND" 153 | }, 154 | "attachments": [ 155 | { 156 | "fileUrl": "https://docs.google.com/document/d/example", 157 | "title": "Project Documentation", 158 | "mimeType": "application/vnd.google-apps.document", 159 | "iconLink": "https://ssl.gstatic.com/docs/doclist/images/icon_11_document_list.png", 160 | "fileId": "1A2b3C4d5E6f" 161 | } 162 | ], 163 | "eventType": "default" 164 | } -------------------------------------------------------------------------------- /jaiqu/tests/calendar/outlook/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#Collection(microsoft.graph.scheduleInformation)", 3 | "value": [ 4 | { 5 | "scheduleId": "adelev@contoso.onmicrosoft.com", 6 | "availabilityView": "000220000", 7 | "scheduleItems": [ 8 | { 9 | "isPrivate": false, 10 | "status": "busy", 11 | "subject": "Let's go for lunch", 12 | "location": "Harry's Bar", 13 | "start": { 14 | "dateTime": "2019-03-15T12:00:00.0000000", 15 | "timeZone": "Pacific Standard Time" 16 | }, 17 | "end": { 18 | "dateTime": "2019-03-15T14:00:00.0000000", 19 | "timeZone": "Pacific Standard Time" 20 | } 21 | } 22 | ], 23 | "workingHours": { 24 | "daysOfWeek": [ 25 | "monday", 26 | "tuesday", 27 | "wednesday", 28 | "thursday", 29 | "friday" 30 | ], 31 | "startTime": "08:00:00.0000000", 32 | "endTime": "17:00:00.0000000", 33 | "timeZone": { 34 | "name": "Pacific Standard Time" 35 | } 36 | } 37 | }, 38 | { 39 | "scheduleId": "meganb@contoso.onmicrosoft.com", 40 | "availabilityView": "200220010", 41 | "scheduleItems": [ 42 | { 43 | "status": "busy", 44 | "start": { 45 | "dateTime": "2019-03-15T08:30:00.0000000", 46 | "timeZone": "Pacific Standard Time" 47 | }, 48 | "end": { 49 | "dateTime": "2019-03-15T09:30:00.0000000", 50 | "timeZone": "Pacific Standard Time" 51 | } 52 | }, 53 | { 54 | "status": "busy", 55 | "start": { 56 | "dateTime": "2019-03-15T12:00:00.0000000", 57 | "timeZone": "Pacific Standard Time" 58 | }, 59 | "end": { 60 | "dateTime": "2019-03-15T14:00:00.0000000", 61 | "timeZone": "Pacific Standard Time" 62 | } 63 | }, 64 | { 65 | "status": "tentative", 66 | "start": { 67 | "dateTime": "2019-03-15T12:00:00.0000000", 68 | "timeZone": "Pacific Standard Time" 69 | }, 70 | "end": { 71 | "dateTime": "2019-03-15T13:00:00.0000000", 72 | "timeZone": "Pacific Standard Time" 73 | } 74 | }, 75 | { 76 | "status": "busy", 77 | "start": { 78 | "dateTime": "2019-03-15T13:00:00.0000000", 79 | "timeZone": "Pacific Standard Time" 80 | }, 81 | "end": { 82 | "dateTime": "2019-03-15T14:00:00.0000000", 83 | "timeZone": "Pacific Standard Time" 84 | } 85 | }, 86 | { 87 | "status": "tentative", 88 | "start": { 89 | "dateTime": "2019-03-15T16:00:00.0000000", 90 | "timeZone": "Pacific Standard Time" 91 | }, 92 | "end": { 93 | "dateTime": "2019-03-15T17:00:00.0000000", 94 | "timeZone": "Pacific Standard Time" 95 | } 96 | } 97 | ], 98 | "workingHours": { 99 | "daysOfWeek": [ 100 | "monday", 101 | "tuesday", 102 | "wednesday", 103 | "thursday", 104 | "friday" 105 | ], 106 | "startTime": "08:00:00.0000000", 107 | "endTime": "17:00:00.0000000", 108 | "timeZone": { 109 | "@odata.type": "#microsoft.graph.customTimeZone", 110 | "bias": 480, 111 | "name": "Customized Time Zone", 112 | "standardOffset": { 113 | "time": "02:00:00.0000000", 114 | "dayOccurrence": 1, 115 | "dayOfWeek": "sunday", 116 | "month": 11, 117 | "year": 0 118 | }, 119 | "daylightOffset": { 120 | "daylightBias": -60, 121 | "time": "02:00:00.0000000", 122 | "dayOccurrence": 2, 123 | "dayOfWeek": "sunday", 124 | "month": 3, 125 | "year": 0 126 | } 127 | } 128 | } 129 | } 130 | ] 131 | } -------------------------------------------------------------------------------- /jaiqu/tests/llms/anthropic/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "msg_018vxZcM9a74Zu2UHqH2mfDd", 3 | "content": [ 4 | { 5 | "text": "Sample completion text", 6 | "type": "text" 7 | } 8 | ], 9 | "model": "claude-2.1", 10 | "role": "assistant", 11 | "stop_reason": "end_turn", 12 | "stop_sequence": null, 13 | "type": "message", 14 | "usage": { 15 | "input_tokens": 27, 16 | "output_tokens": 22 17 | } 18 | } -------------------------------------------------------------------------------- /jaiqu/tests/llms/arize_openetelemetry/input.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "llm", 3 | "context": { 4 | "trace_id": "409df945-e058-4829-b240-cfbdd2ff4488", 5 | "span_id": "01fa9612-01b8-4358-85d6-e3e067305ec3" 6 | }, 7 | "span_kind": "LLM", 8 | "parent_id": "2fe8a793-2cf1-42d7-a1df-bd7d46e017ef", 9 | "start_time": "2024-01-11T16:45:17.982858-07:00", 10 | "end_time": "2024-01-11T16:45:18.517639-07:00", 11 | "status_code": "OK", 12 | "status_message": "", 13 | "attributes": { 14 | "llm.input_messages": [ 15 | { 16 | "message.role": "system", 17 | "message.content": "You are a Shakespearean writing assistant who speaks in a Shakespearean style. You help people come up with creative ideas and content like stories, poems, and songs that use Shakespearean style of writing style, including words like \"thou\" and \"hath\u201d.\nHere are some example of Shakespeare's style:\n - Romeo, Romeo! Wherefore art thou Romeo?\n - Love looks not with the eyes, but with the mind; and therefore is winged Cupid painted blind.\n - Shall I compare thee to a summer's day? Thou art more lovely and more temperate.\n" 18 | }, 19 | { "message.role": "user", "message.content": "what is 23 times 87" } 20 | ], 21 | "llm.model_name": "gpt-3.5-turbo-0613", 22 | "llm.invocation_parameters": "{\"model\": \"gpt-3.5-turbo-0613\", \"temperature\": 0.1, \"max_tokens\": null}", 23 | "output.value": "{\"tool_calls\": [{\"id\": \"call_Re47Qyh8AggDGEEzlhb4fu7h\", \"function\": {\"arguments\": \"{\\n \\\"a\\\": 23,\\n \\\"b\\\": 87\\n}\", \"name\": \"multiply\"}, \"type\": \"function\"}]}", 24 | "output.mime_type": "application/json", 25 | "llm.output_messages": [ 26 | { 27 | "message.role": "assistant", 28 | "message.tool_calls": [ 29 | { 30 | "tool_call.function.name": "multiply", 31 | "tool_call.function.arguments": "{\n \"a\": 23,\n \"b\": 87\n}" 32 | } 33 | ] 34 | } 35 | ], 36 | "llm.token_count.prompt": 229, 37 | "llm.token_count.completion": 21, 38 | "llm.token_count.total": 250 39 | }, 40 | "events": [] 41 | } -------------------------------------------------------------------------------- /jaiqu/tests/llms/errors.schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "type": "object", 4 | "properties": { 5 | "exception.type": { 6 | "type": "string", 7 | "description": "The type of exception that was thrown" 8 | }, 9 | "exception.message": { 10 | "type": "string", 11 | "description": "Detailed message describing the exception" 12 | }, 13 | "exception.escaped": { 14 | "type": "boolean", 15 | "description": "Indicator if the exception has escaped the span's scope" 16 | }, 17 | "exception.stacktrace": { 18 | "type": "string", 19 | "description": "The stack trace of the exception" 20 | } 21 | }, 22 | "required": [ 23 | "exception.type", 24 | "exception.message", 25 | "exception.escaped", 26 | "exception.stacktrace" 27 | ] 28 | } -------------------------------------------------------------------------------- /jaiqu/tests/llms/llms.schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "type": "object", 4 | "properties": { 5 | "id": { 6 | "type": [ 7 | "string", 8 | "null" 9 | ], 10 | "description": "A unique identifier for the record." 11 | }, 12 | "thread_id": { 13 | "type": "string", 14 | "format": "uuid", 15 | "description": "A UUID representing the thread." 16 | }, 17 | "output": { 18 | "type": "string", 19 | "description": "The output value of an operation" 20 | }, 21 | "input": { 22 | "type": "string", 23 | "description": "The input value to an operation" 24 | }, 25 | "invocation_parameters": { 26 | "type": "string", 27 | "description": "Parameters used during the invocation of an LLM or API" 28 | }, 29 | "prompt": { 30 | "type": "array", 31 | "items": { 32 | "type": "object", 33 | "properties": { 34 | "message.role": { 35 | "type": "string" 36 | }, 37 | "message.content": { 38 | "type": "string" 39 | } 40 | }, 41 | "required": [ 42 | "message.role", 43 | "message.content" 44 | ] 45 | }, 46 | "description": "List of messages sent to the LLM in a chat API request" 47 | }, 48 | "completion": { 49 | "type": "array", 50 | "items": { 51 | "type": "object", 52 | "properties": { 53 | "message.role": { 54 | "type": "string" 55 | }, 56 | "message.content": { 57 | "type": "string" 58 | } 59 | }, 60 | "required": [ 61 | "message.role", 62 | "message.content" 63 | ] 64 | }, 65 | "description": "List of messages received from the LLM in a chat API request" 66 | }, 67 | "prompt_tokens": { 68 | "type": "integer", 69 | "description": "The number of tokens in the prompt" 70 | }, 71 | "completion_tokens": { 72 | "type": "integer", 73 | "description": "The number of tokens in the completion" 74 | }, 75 | "total_tokens": { 76 | "type": "integer", 77 | "description": "Total number of tokens, including prompt and completion" 78 | }, 79 | "tool_calls": { 80 | "type": "array", 81 | "items": { 82 | "type": "object", 83 | "properties": { 84 | "tool_call.function.name": { 85 | "type": "string" 86 | } 87 | }, 88 | "required": [ 89 | "tool_call.function.name" 90 | ] 91 | }, 92 | "description": "List of tool calls (e.g. function calls) generated by the LLM" 93 | }, 94 | "model_name": { 95 | "type": "string", 96 | "description": "The name of the language model being utilized" 97 | }, 98 | "tool.name": { 99 | "type": "string", 100 | "description": "The name of the tool being utilized" 101 | }, 102 | "tool.description": { 103 | "type": "string", 104 | "description": "Description of the tool's purpose and functionality" 105 | }, 106 | "tool.parameters": { 107 | "type": "string", 108 | "description": "The parameters definition for invoking the tool" 109 | }, 110 | "metadata": { 111 | "type": "string", 112 | "description": "Metadata associated with a span" 113 | }, 114 | "init_timestamp": { 115 | "type": "string", 116 | "format": "date-time", 117 | "description": "The initial timestamp when the operation was started" 118 | }, 119 | "end_timestamp": { 120 | "type": "string", 121 | "format": "date-time", 122 | "description": "The ending timestamp when the operation was completed" 123 | } 124 | }, 125 | "required": [ 126 | "output", 127 | "input", 128 | "invocation_parameters", 129 | "prompt", 130 | "completion", 131 | "model_name", 132 | "init_timestamp", 133 | "end_timestamp" 134 | ] 135 | } -------------------------------------------------------------------------------- /jaiqu/tests/llms/openai/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "type": "object", 4 | "properties": { 5 | "id": { 6 | "type": ["string", "null"], 7 | "description": "A unique identifier for the record." 8 | }, 9 | "thread_id": { 10 | "type": "string", 11 | "format": "uuid", 12 | "description": "A UUID representing the thread." 13 | }, 14 | "completion": { 15 | "type": ["array", "string"], 16 | "description": "A JSONB object representing the completion." 17 | }, 18 | "model": { 19 | "type": "string", 20 | "description": "A text field representing the model used." 21 | }, 22 | "prompt_tokens": { 23 | "type": "number", 24 | "description": "A numeric field representing the number of tokens in the prompt." 25 | }, 26 | "completion_tokens": { 27 | "type": "number", 28 | "description": "A numeric field representing the number of tokens in the completion." 29 | }, 30 | "init_timestamp": { 31 | "type": "string", 32 | "format": "date-time", 33 | "description": "A timestamp with time zone indicating the initialization time." 34 | } 35 | }, 36 | "required": [ 37 | "id", 38 | "completion", 39 | "model" 40 | ] 41 | } -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0", "wheel"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "jaiqu" 7 | version = "0.0.6" 8 | authors = [ 9 | { name = "Alex Reibman", email = "areibman@gmail.com" }, 10 | { name = "Howard Gil", email = "howardbgil@gmail.com" }, 11 | { name = "Braelyn Boynton", email = "bboynton97@gmail.com" } 12 | ] 13 | description = "AI utility to extract data from any JSON and reformat it into a new JSON with repeatable queries." 14 | readme = "README.md" 15 | requires-python = ">=3.7" 16 | classifiers = [ 17 | "Programming Language :: Python :: 3", 18 | "License :: OSI Approved :: MIT License", 19 | "Operating System :: OS Independent", 20 | ] 21 | dependencies = [ 22 | "jq==1.6.0", 23 | "openai>=1.63.2", 24 | "jsonschema==4.21.1", 25 | "typer==0.9.0", 26 | ] 27 | 28 | [project.optional-dependencies] 29 | dev = [ 30 | "pytest>=7.4.4", 31 | "flake8>=3.1.0", 32 | "coverage[toml]>=7.4.0", 33 | ] 34 | 35 | [project.urls] 36 | Homepage = "https://github.com/AgentOps-AI/Jaiqu" 37 | Issues = "https://github.com/AgentOps-AI/Jaiqu/issues" 38 | 39 | [project.entry-points.console_scripts] 40 | jaiqu = "jaiqu.cli:main" 41 | 42 | [tool.setuptools] 43 | packages = { find = { where = ["."], exclude = ["samples"] } } 44 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | jq==1.6.0 2 | openai>=1.12.0,<2.0.0 3 | jsonschema==4.21.1 4 | typer==0.9.0 -------------------------------------------------------------------------------- /samples/data.json: -------------------------------------------------------------------------------- 1 | { 2 | "call.id": "123", 3 | "datetime": "2022-01-01", 4 | "timestamp": 1640995200, 5 | "Address": "123 Main St", 6 | "user": { 7 | "name": "John Doe", 8 | "age": 30, 9 | "contact": "john@email.com" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /samples/schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "type": "object", 4 | "properties": { 5 | "id": { 6 | "type": ["string", "null"], 7 | "description": "A unique identifier for the record." 8 | }, 9 | "date": { 10 | "type": "string", 11 | "description": "A string describing the date." 12 | }, 13 | "model": { 14 | "type": "string", 15 | "description": "A text field representing the model used." 16 | } 17 | }, 18 | "required": [ 19 | "id", 20 | "date" 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /streamlit_app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import json 3 | import jq 4 | import os 5 | from jaiqu import validate_schema, translate_schema 6 | 7 | # Set page layout to wide 8 | st.set_page_config(layout="wide", page_title="Jaiqu: AI JSON Schema to JQ Query Generator") 9 | 10 | # Custom styles for Streamlit elements 11 | st.markdown( 12 | """ 13 | 21 | """, 22 | unsafe_allow_html=True 23 | ) 24 | 25 | # Title of the app with custom color 26 | st.markdown("

Jaiqu: AI Schema to JQ Query Generator

", 27 | unsafe_allow_html=True) # Added horizontal padding to the title 28 | 29 | st.header('Desired data format') 30 | col1, col2 = st.columns(2) 31 | 32 | with col1: 33 | schema_json = st.text_area('Enter the desired JSON schema', value=json.dumps({ 34 | "$schema": "http://json-schema.org/draft-07/schema#", 35 | "type": "object", 36 | "properties": { 37 | "id": { 38 | "type": ["string", "null"], 39 | "description": "A unique identifier for the record." 40 | }, 41 | "date": { 42 | "type": "string", 43 | "description": "A string describing the date." 44 | }, 45 | "model": { 46 | "type": "string", 47 | "description": "A text field representing the model used." 48 | } 49 | }, 50 | "required": [ 51 | "id", 52 | "date" 53 | ] 54 | }, indent=2), height=200) 55 | 56 | with col2: 57 | input_json_str = st.text_area('Enter the input JSON', value=json.dumps({ 58 | "call.id": "123", 59 | "datetime": "2022-01-01", 60 | "timestamp": 1640995200, 61 | "Address": "123 Main St", 62 | "user": { 63 | "name": "John Doe", 64 | "age": 30, 65 | "contact": "john@email.com" 66 | } 67 | }, indent=2), height=200) 68 | 69 | with col1: 70 | schema = json.loads(schema_json) 71 | st.json(schema, expanded=False) 72 | 73 | with col2: 74 | input_json = json.loads(input_json_str) 75 | st.json(input_json, expanded=False) 76 | 77 | st.markdown("
", # Added horizontal margin to the horizontal line 78 | unsafe_allow_html=True) 79 | 80 | st.header('Optional Inputs') 81 | opt_col1, opt_col2 = st.columns(2) 82 | 83 | with opt_col1: 84 | key_hints = st.text_area('Enter any hints for key mapping', 85 | value="We are processing outputs of an containing an id and a date of a user.", height=100) 86 | 87 | with opt_col2: 88 | max_retries = st.number_input('Set maximum retries for translation', min_value=1, 89 | value=20, format="%d") 90 | openai_api_key = st.text_input('Enter your OpenAI API key', type="password") 91 | 92 | # Validate schema 93 | if st.button('Validate Schema', key="validate_schema"): 94 | 95 | if not openai_api_key: 96 | st.error("Please provide your OpenAI API key to validate the schema.") 97 | st.stop() 98 | 99 | with st.spinner('Validating schema...'): 100 | schema_properties, valid = validate_schema(input_json, schema, 101 | openai_api_key=openai_api_key, key_hints=key_hints) 102 | st.write('Schema is valid:', valid) 103 | st.json(schema_properties, expanded=False) 104 | 105 | # Translate schema 106 | if st.button('Translate Schema', key="translate_schema"): 107 | with st.spinner('Translating schema...'): 108 | 109 | if not openai_api_key: 110 | st.error("Please provide your OpenAI API key to translate the schema.") 111 | st.stop() 112 | 113 | jq_query = translate_schema(input_json, schema, 114 | openai_api_key=openai_api_key, 115 | key_hints=key_hints, max_retries=int(max_retries)) 116 | st.text('Finalized jq query') 117 | st.code(jq_query, language="jq") 118 | 119 | with st.spinner('Checking the jq query results...'): 120 | # Check the jq query results 121 | st.text('JQ query results') 122 | try: 123 | result = jq.compile(jq_query).input(input_json).all()[0] 124 | st.write(result) 125 | except Exception as e: 126 | st.error(f"Error: {e}") 127 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py3, flake8 3 | isolated_build = true 4 | 5 | [testenv] 6 | deps = 7 | pytest 8 | coverage 9 | 10 | [testenv:flake8] 11 | deps = flake8 12 | commands = flake8 jaiqu/ 13 | 14 | [flake8] 15 | max-line-length = 120 16 | per-file-ignores = 17 | tokencost/__init__.py: F401 --------------------------------------------------------------------------------