├── .github
└── workflows
│ └── python-publish.yml
├── .gitignore
├── Example.ipynb
├── LICENSE
├── README.md
├── architecture.png
├── jaiqu.png
├── jaiqu
├── __init__.py
├── cli.py
├── helpers.py
├── jaiqu.py
└── tests
│ ├── __init__.py
│ ├── calendar
│ ├── event.schema.json
│ ├── gcal
│ │ └── input.json
│ └── outlook
│ │ └── input.json
│ └── llms
│ ├── anthropic
│ └── input.json
│ ├── arize_openetelemetry
│ └── input.json
│ ├── errors.schema.json
│ ├── llms.schema.json
│ └── openai
│ └── schema.json
├── pyproject.toml
├── requirements.txt
├── samples
├── data.json
└── schema.json
├── streamlit_app.py
└── tox.ini
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
1 | # This workflow will upload a Python Package using Twine when a release is created
2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
3 |
4 | # This workflow uses actions that are not certified by GitHub.
5 | # They are provided by a third-party and are governed by
6 | # separate terms of service, privacy policy, and support
7 | # documentation.
8 |
9 | name: Upload Python Package
10 |
11 | on:
12 | release:
13 | types: [published]
14 |
15 | permissions:
16 | contents: read
17 | id-token: write
18 |
19 | jobs:
20 | deploy:
21 |
22 | runs-on: ubuntu-latest
23 |
24 | steps:
25 | - uses: actions/checkout@v3
26 | - name: Set up Python
27 | uses: actions/setup-python@v3
28 | with:
29 | python-version: '3.x'
30 | - name: Install dependencies
31 | run: |
32 | python -m pip install --upgrade pip
33 | pip install build
34 | - name: Build package
35 | run: python -m build
36 | - name: Publish package
37 | uses: pypa/gh-action-pypi-publish@v1.8.11
38 | with:
39 | user: __token__
40 | password: ${{ secrets.PYPI_API_TOKEN }}
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 |
162 | # Ignore .vscode folder
163 | .vscode/
164 |
165 | # Ignore .DS_Store file
166 | .DS_Store
167 |
168 | # Ignore .env file
169 | .env
170 |
171 |
--------------------------------------------------------------------------------
/Example.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": null,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import json\n",
10 | "import jq\n",
11 | "from jaiqu import validate_schema, translate_schema"
12 | ]
13 | },
14 | {
15 | "cell_type": "markdown",
16 | "metadata": {},
17 | "source": [
18 | "### Desired data format \n",
19 | "\n",
20 | "Create a `jsonschema` dictionary for the format of data you want. Data extracted from your input will be extracted into this format."
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": null,
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "schema = {\n",
30 | " \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n",
31 | " \"type\": \"object\",\n",
32 | " \"properties\": {\n",
33 | " \"id\": {\n",
34 | " \"type\": [\"string\", \"null\"],\n",
35 | " \"description\": \"A unique identifier for the record.\"\n",
36 | " },\n",
37 | " \"date\": {\n",
38 | " \"type\": \"string\",\n",
39 | " \"description\": \"A string describing the date.\"\n",
40 | " },\n",
41 | " \"model\": {\n",
42 | " \"type\": \"string\",\n",
43 | " \"description\": \"A text field representing the model used.\"\n",
44 | " }\n",
45 | " },\n",
46 | " \"required\": [\n",
47 | " \"id\",\n",
48 | " \"date\"\n",
49 | " ]\n",
50 | "}"
51 | ]
52 | },
53 | {
54 | "cell_type": "markdown",
55 | "metadata": {},
56 | "source": [
57 | "### Sample input data\n",
58 | "Provoide an input JSON dictionary containing the data you want to extract."
59 | ]
60 | },
61 | {
62 | "cell_type": "code",
63 | "execution_count": null,
64 | "metadata": {},
65 | "outputs": [],
66 | "source": [
67 | "input_json = {\n",
68 | " \"call.id\": \"123\",\n",
69 | " \"datetime\": \"2022-01-01\",\n",
70 | " \"timestamp\": 1640995200,\n",
71 | " \"Address\": \"123 Main St\",\n",
72 | " \"user\": {\n",
73 | " \"name\": \"John Doe\",\n",
74 | " \"age\": 30,\n",
75 | " \"contact\": \"john@email.com\"\n",
76 | " }\n",
77 | "}"
78 | ]
79 | },
80 | {
81 | "cell_type": "markdown",
82 | "metadata": {},
83 | "source": [
84 | "### (Optional) Create hints\n",
85 | "The jaiqu agent may not know certain concepts. For example, you might want to have some keys interpreted a certain way (i.e. interpret \"contact\" as \"email\"). For tricky interpretations, create hints."
86 | ]
87 | },
88 | {
89 | "cell_type": "code",
90 | "execution_count": null,
91 | "metadata": {},
92 | "outputs": [],
93 | "source": [
94 | "key_hints = \"We are processing outputs of an containing an id and a date of a user.\""
95 | ]
96 | },
97 | {
98 | "cell_type": "code",
99 | "execution_count": null,
100 | "metadata": {},
101 | "outputs": [],
102 | "source": [
103 | "schema_properties, valid = validate_schema(input_json, schema, key_hints=key_hints, openai_api_key='key')"
104 | ]
105 | },
106 | {
107 | "cell_type": "markdown",
108 | "metadata": {},
109 | "source": [
110 | "### Verify schema\n",
111 | "Verify the input JSON contains the keys and values requested in your schema"
112 | ]
113 | },
114 | {
115 | "cell_type": "code",
116 | "execution_count": null,
117 | "metadata": {},
118 | "outputs": [],
119 | "source": [
120 | "print('Schema is valid:',valid)\n",
121 | "print('-'*10)\n",
122 | "print(json.dumps(schema_properties, indent=2))"
123 | ]
124 | },
125 | {
126 | "cell_type": "code",
127 | "execution_count": null,
128 | "metadata": {},
129 | "outputs": [],
130 | "source": [
131 | "jq_query = translate_schema(input_json, schema, key_hints=key_hints, max_retries=20, openai_api_key='key')"
132 | ]
133 | },
134 | {
135 | "cell_type": "markdown",
136 | "metadata": {},
137 | "source": [
138 | "## Finalized jq query"
139 | ]
140 | },
141 | {
142 | "cell_type": "code",
143 | "execution_count": null,
144 | "metadata": {},
145 | "outputs": [],
146 | "source": [
147 | "jq_query"
148 | ]
149 | },
150 | {
151 | "cell_type": "markdown",
152 | "metadata": {},
153 | "source": [
154 | "### Check the jq query results"
155 | ]
156 | },
157 | {
158 | "cell_type": "code",
159 | "execution_count": null,
160 | "metadata": {
161 | "scrolled": true
162 | },
163 | "outputs": [],
164 | "source": [
165 | "result = jq.compile(jq_query).input(input_json).all()\n",
166 | "print(jq.compile(jq_query).input(input_json).all())"
167 | ]
168 | },
169 | {
170 | "cell_type": "code",
171 | "execution_count": null,
172 | "metadata": {},
173 | "outputs": [],
174 | "source": []
175 | }
176 | ],
177 | "metadata": {
178 | "kernelspec": {
179 | "display_name": "Python 3 (ipykernel)",
180 | "language": "python",
181 | "name": "python3"
182 | },
183 | "language_info": {
184 | "codemirror_mode": {
185 | "name": "ipython",
186 | "version": 3
187 | },
188 | "file_extension": ".py",
189 | "mimetype": "text/x-python",
190 | "name": "python",
191 | "nbconvert_exporter": "python",
192 | "pygments_lexer": "ipython3",
193 | "version": "3.11.5"
194 | }
195 | },
196 | "nbformat": 4,
197 | "nbformat_minor": 2
198 | }
199 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 AgentOps
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Natural language to DSL agent for JSON querying
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | 🔗 Main site
15 | •
16 | 🐦 Twitter
17 | •
18 | 📢 Discord
19 | •
20 | 🖇️ AgentOps
21 |
22 |
23 | # Jaiqu
24 |
25 | []([https://.streamlit.app](https://jaiqu-agent.streamlit.app)) [](https://opensource.org/licenses/MIT) 
26 | [](https://x.com/agentopsai)
27 |
28 |
29 | Replicable, AI-generated JSON transformation queries. Transform any JSON into any schema automatically.
30 |
31 | Jaiqu is an AI agent for creating repeatable JSON transforms using [jq query language](https://jqlang.github.io/jq/) syntax. Jaiqu translates any arbitrary JSON inputs into any desired schema.
32 |
33 | Building AI agents? Check out [AgentOps](https://agentops.ai/?jaiqu)
34 |
35 | [Live Demo](https://jaiqu-agent.streamlit.app)
36 |
37 | [Video Overview](https://youtu.be/exbV35-XWA0)
38 |
39 | 
40 |
41 |
42 | ### Features
43 | * **Translate any schema to any schema** AI agent automatically maps data from a source schema to a desired format by iteratively prompting GPT-4 to create valid jq query syntax.
44 | * **Schema validation** Given a requirement schema, automatically validate whether the required data is present in the input json.
45 | * **Fuzzy term matching** Infers keys based on symantic similarity (i.e. datetime vs date_time). GPT-4 automatically maps and translates input keys to desired output keys.
46 |
47 | ### Example usage:
48 |
49 | ```python
50 | from jaiqu import validate_schema, translate_schema
51 |
52 | # Desired data format
53 | schema = {
54 | "$schema": "http://json-schema.org/draft-07/schema#",
55 | "type": "object",
56 | "properties": {
57 | "id": {
58 | "type": ["string", "null"],
59 | "description": "A unique identifier for the record."
60 | },
61 | "date": {
62 | "type": "string",
63 | "description": "A string describing the date."
64 | },
65 | "model": {
66 | "type": "string",
67 | "description": "A text field representing the model used."
68 | }
69 | },
70 | "required": [
71 | "id",
72 | "date"
73 | ]
74 | }
75 |
76 | # Provided data
77 | input_json = {
78 | "call.id": "123",
79 | "datetime": "2022-01-01",
80 | "timestamp": 1640995200,
81 | "Address": "123 Main St",
82 | "user": {
83 | "name": "John Doe",
84 | "age": 30,
85 | "contact": "john@email.com"
86 | }
87 | }
88 |
89 | # (Optional) Create hints so the agent knows what to look for in the input
90 | key_hints="We are processing outputs of an containing an id, a date, and a model. All the required fields should be present in this input, but the names might be different."
91 | ```
92 |
93 | Validating an input json contains all the information required in a schema
94 | ```python
95 | schema_properties, valid = validate_schema(input_json, schema, key_hints)
96 |
97 | print(schema_properties)
98 |
99 | >>> {
100 | "id": {
101 | "identified": true,
102 | "key": "call.id",
103 | "message": "123",
104 | "type": [
105 | "string",
106 | "null"
107 | ],
108 | "description": "A unique identifier for the record.",
109 | "required": true
110 | },
111 | "date": {
112 | "identified": true,
113 | "key": "datetime",
114 | "message": "2022-01-01",
115 | "type": "string",
116 | "description": "A string describing the date."
117 | "required": true
118 | }
119 | }
120 | print(valid)
121 | >>> True
122 | ```
123 |
124 | Creating a repeatable jq query for extracitng data from identically formatted input JSONs
125 | ```python
126 | jq_query = jaiqu.translate_schema(input_json, schema, key_hints, max_retries=30)
127 | >>>'{"id": .attributes["call.id"], "date": .datetime}'
128 | ```
129 |
130 | ### CLI Usage
131 |
132 | ```bash
133 | git clone https://github.com/AgentOps-AI/Jaiqu.git
134 | cd Jaiqu/samples/
135 |
136 | jaiqu -s schema.json -d data.json
137 | # Validating schema: 100%|███████████████████████████| 3/3 [00:11<00:00, 3.73s/it, Key: model]
138 | # Translating schema: 100%|███████████████████████████| 2/2 [00:02<00:00, 1.46s/it, Key: date]
139 | # Retry attempts: 20%|███████████████████▌ | 2/10 [00:02<00:11, 1.46s/it]
140 | # Validation attempts: 10%|█████████▎ | 1/10 [00:00<00:08, 1.02it/s]
141 |
142 | jq '{ "id": (if .["call.id"] then .["call.id"] else null end), "date": (if has("datetime") then .datetime else "None" end) }' data.json
143 | # Run command?
144 | # [E]xecute, [A]bort: e
145 | # {
146 | # "id": "123",
147 | # "date": "2022-01-01"
148 | # }
149 | ```
150 |
151 | > Note: usage is currently limited to python 3.9 & 3.10
152 |
153 | ## Installation
154 |
155 | #### Recommended: [PyPI](https://pypi.org/project/jaiqu/):
156 |
157 | ```bash
158 | pip install jaiqu
159 | ```
160 |
161 |
162 | ## Architecture
163 | Unraveling the Jaiqu agentic workflow pattern
164 | ```mermaid
165 | flowchart TD
166 | A[Start translate_schema] --> B{Validate input schema}
167 | B -- Valid --> C[For each key, create a jq filter query]
168 | B -- Invalid --> D[Throw RuntimeError]
169 | C --> E[Compile and Test jq Filter]
170 | E -- Success --> F[Validate JSON]
171 | E -- Fail --> G[Retry Create jq Filter]
172 | G -- Success --> E
173 | G -- Fail n times--> H[Throw RuntimeError]
174 | F -- Success --> I[Return jq query string]
175 | F -- Fail --> J[Retry Validate JSON]
176 | J -- Success --> I
177 | J -- Fail n times --> K[Throw RuntimeError]
178 | ```
179 |
180 |
181 | ## Running tests
182 |
183 | 0. Install `pytest` if you don't have it already
184 |
185 | ```shell
186 | pip install pytest
187 | ```
188 |
189 | 1. Run the `tests/` folder while in the parent directory
190 |
191 | ```shell
192 | pytest tests
193 | ```
194 |
195 | This repo also supports `tox`, simply run `python -m tox`.
196 |
197 | ## Contributing
198 |
199 | Contributions to Jaiqu are welcome! Feel free to create an [issue](https://github.com/AgentOps-AI/jaiqu/issues) for any bug reports, complaints, or feature suggestions.
200 |
201 | ## License
202 |
203 | Jaiqu is released under the MIT License.
204 |
--------------------------------------------------------------------------------
/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AgentOps-AI/Jaiqu/a2ce57eddb967e883d0cdbde91507b636230b86a/architecture.png
--------------------------------------------------------------------------------
/jaiqu.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AgentOps-AI/Jaiqu/a2ce57eddb967e883d0cdbde91507b636230b86a/jaiqu.png
--------------------------------------------------------------------------------
/jaiqu/__init__.py:
--------------------------------------------------------------------------------
1 | from .jaiqu import validate_schema, translate_schema
2 |
--------------------------------------------------------------------------------
/jaiqu/cli.py:
--------------------------------------------------------------------------------
1 | import json
2 | import sys
3 |
4 | import typer
5 | from typer import Option, Typer
6 | from click.types import Choice
7 |
8 | from .jaiqu import translate_schema
9 | from .helpers import run_command
10 |
11 | typer_app = Typer()
12 |
13 |
14 | @typer_app.command()
15 | def jaiqu(
16 | schema_file: str = Option(..., "-s", "--schema", help="Json schema file path"),
17 | data_file: str = Option(
18 | None,
19 | "-d",
20 | "--data",
21 | help="Json data file path. if not passed will try to read from stdin",
22 | ),
23 | quiet: bool = Option(False, "-q", "--quiet", help="Quiet mode, only print errors"),
24 | key_hints: str = Option(
25 | None,
26 | "-k",
27 | "--key-hints",
28 | help="Extra prompt for the ai to help it complete the task",
29 | ),
30 | max_retries: int = Option(
31 | 10,
32 | "-r",
33 | "--max-retries",
34 | help="Max number of retries for the ai to complete the task",
35 | ),
36 | ):
37 | """
38 | Validate and translate a json schema to jq filter
39 | """
40 | with open(schema_file) as f:
41 | output_schema = json.load(f)
42 | if data_file is None:
43 | if sys.stdin.isatty():
44 | sys.exit("Error: No data piped to stdin.")
45 | else:
46 | if not quiet:
47 | print("--data not provided, reading from stdin")
48 | data_file = sys.stdin.read()
49 | input_json = json.loads(data_file)
50 | else:
51 | with open(data_file) as f:
52 | input_json = json.load(f)
53 |
54 | query = translate_schema(
55 | output_schema=output_schema,
56 | input_json=input_json,
57 | key_hints=key_hints,
58 | max_retries=max_retries,
59 | quiet=quiet
60 | )
61 | full_completion = f"jq '{query}' {data_file}"
62 | print(f"\n{full_completion}\nRun command?")
63 | option = typer.prompt(
64 | text="[E]xecute, [A]bort",
65 | type=Choice(("e", "a"), case_sensitive=False),
66 | default="e",
67 | show_choices=False,
68 | show_default=False,
69 | )
70 | if option in ("e"):
71 | run_command(full_completion)
72 |
73 |
74 | def main():
75 | typer_app()
76 |
77 |
78 | if __name__ == "__main__":
79 | main()
80 |
--------------------------------------------------------------------------------
/jaiqu/helpers.py:
--------------------------------------------------------------------------------
1 | import os
2 | import platform
3 | import shlex
4 |
5 | from openai.types.chat.chat_completion_message_param import ChatCompletionMessageParam
6 | from typing import Optional, Union
7 | from openai import OpenAI
8 |
9 | # jaiqu doesn't seem to consistently work with gpt-4-turbo or gpt-4o-mini
10 | OPENAI_MODEL = "gpt-4o"
11 |
12 |
13 | def to_key(response: str) -> Union[str, None]:
14 | """Extract the key from the response."""
15 | key = response.split('`')[-2]
16 | if key == "None":
17 | return None
18 | return key
19 |
20 |
21 | def identify_key(key, value, input_schema, openai_api_key=None, key_hints=None) -> tuple[Optional[str], str]:
22 | """Identify if a key is present in a schema. This function uses the OpenAI API to generate a response."""
23 |
24 | system_message = """You are a perfect system designed to validate and extract data from JSON files.
25 | For each field, you provide a short check about your reasoning. Go line by line and do a side by side comparison. For example:
26 |
27 | Schema:
28 | {
29 | "id": "123",
30 | "date": "2022-01-01",
31 | "timestamp": 1640995200,
32 | "Address": "123 Main St",
33 | "input": "hello"
34 | "user": {
35 | "name": "John Doe",
36 | "age": 30,
37 | "contact": "john@email.com"
38 | }
39 | }
40 |
41 | "id" | "id" : The field name is identical. Extracted key: `id`
42 | "Date" | "date" : The field name is the same except for capitalization. Extracted key: `date`
43 | "time" | "timestamp": This is the same concept, therefore it counts. Extracted key: `timestamp`
44 | "addr" | "Address": This is the same concept and is a , therefore it counts. Extracted key: `Address`
45 | "cats" | None: There no matching or remotely similar fields. Extracted key: `None`
46 | "input" | "input": The names match, but the types are different. Extracted key: `None`
47 |
48 | If we are given hints, we can use them to help us determine if a key is present. For example, if the hint states we are searching for emails in a schema where "email" is not present, we can infer:
49 | "email" | "contact": The names are different, but contact implies email. Extracted key: `contact`
50 |
51 | Some fields may not have the exact same names. Use your best judgement about the meaning of the field to determine if they should count.
52 | Think of the key you are searching for in relation to other keys in the schema; this may help you determine if the key is present.
53 | The content of the field may also help you determine if the key is present. For example, if you are searching for a date, and the field contains a date, it is likely the key you are searching for.
54 | You come to a definitive conclusion, the name of the key you found, at the end of your response."""
55 |
56 | if key_hints is not None:
57 | system_message += "\n\nAdditionally, consider the following: " + key_hints
58 | messages: list[ChatCompletionMessageParam] = [{
59 | "role": "system",
60 | "content": system_message
61 | },
62 | {
63 | "role": "user",
64 | "content": f"Is `{key}` of type `{value}` present in the desired schema?:\n\n {input_schema}"
65 | }]
66 |
67 | reasoning_response = OpenAI(api_key=openai_api_key).chat.completions.create(messages=messages,
68 | model=OPENAI_MODEL,
69 | )
70 | completion = str(reasoning_response.choices[0].message.content)
71 |
72 | return to_key(completion), completion
73 |
74 |
75 | def create_jq_string(input_schema, key, value, openai_api_key) -> str:
76 | messages: list[ChatCompletionMessageParam] = [{
77 | "role": "system",
78 | "content": f"""You are a perfect jq engineer designed to validate and extract data from JSON files using jq. Only reply with code. Do NOT use any natural language. Do NOT use markdown i.e. ```.
79 |
80 | Your task is to create a jq filter to extract the data from the following JSON:
81 |
82 | {input_schema}
83 |
84 | You will be given the type of the key you need to extract. Only extract the key that corresponds to the type.
85 |
86 | * Do NOT extract values based on exact indices.
87 | * Do NOT create default values.
88 | * If the key is not present and it is not required, DO NOT extract it. Return the literal value `None`. This is NOT a string, but the actual value `None`.
89 |
90 | """
91 | },
92 | {
93 | "role": "user",
94 | "content": f"Write jq to extract the key `{key}`of type `{value['type']}`"
95 | }]
96 |
97 | response = OpenAI(api_key=openai_api_key).chat.completions.create(messages=messages, model=OPENAI_MODEL)
98 | return str(response.choices[0].message.content)
99 |
100 |
101 | def repair_query(query, error, input_schema, openai_api_key):
102 | messages: list[ChatCompletionMessageParam] = [{
103 | "role": "system",
104 | "content": "You are a perfect jq engineer designed to validate and extract data from JSON files using jq. Only reply with code. Do NOT use any natural language. Do NOT use markdown i.e. ```."
105 | },
106 | {
107 | "role": "user",
108 | "content": f"""The following query returned an error while extracting from the following schema:
109 |
110 | Query: {query}
111 |
112 | Error: {error}
113 |
114 | Schema: {input_schema}"""}]
115 | response = OpenAI(api_key=openai_api_key).chat.completions.create(messages=messages,
116 | model=OPENAI_MODEL)
117 | return str(response.choices[0].message.content)
118 |
119 |
120 | def dict_to_jq_filter(transformation_dict) -> str:
121 | jq_filter_parts = []
122 | for new_key, json_path in transformation_dict.items():
123 | # For each item in the dictionary, create a string '"new_key": json_path'
124 | # Note: json_path is assumed to be a valid jq path expression as a string
125 | jq_filter_parts.append(f'"{new_key}": {json_path}')
126 |
127 | # Join all parts with commas and wrap in braces to form a valid jq object filter
128 | jq_filter = "{ " + ",\n ".join(jq_filter_parts) + " }"
129 | return jq_filter
130 |
131 |
132 | def run_command(command: str) -> None:
133 | """
134 | Runs a command in the user's shell.
135 | It is aware of the current user's $SHELL.
136 | :param command: A shell command to run.
137 | """
138 | if platform.system() == "Windows":
139 | is_powershell = len(os.getenv("PSModulePath", "").split(os.pathsep)) >= 3
140 | full_command = (
141 | f'powershell.exe -Command "{command}"'
142 | if is_powershell
143 | else f'cmd.exe /c "{command}"'
144 | )
145 | else:
146 | shell = os.environ.get("SHELL", "/bin/sh")
147 | full_command = f"{shell} -c {shlex.quote(command)}"
148 |
149 | os.system(full_command)
150 |
--------------------------------------------------------------------------------
/jaiqu/jaiqu.py:
--------------------------------------------------------------------------------
1 |
2 | import jq
3 | import json
4 | from jsonschema import validate
5 | from tqdm.auto import tqdm # Use the auto submodule for notebook-friendly output if necessary
6 | from .helpers import identify_key, create_jq_string, repair_query, dict_to_jq_filter
7 |
8 |
9 | def validate_schema(input_json: dict, output_schema: dict, openai_api_key: str | None = None, key_hints=None, quiet=False) -> tuple[dict, bool]:
10 | """Validates the schema of the input JSON against the output schema.
11 | Args:
12 | input_json (dict): The input JSON parsed into a dictionary.
13 | output_schema (dict): The output schema against which the input JSON schema needs to be validated.
14 | openai_api_key (str | None, optional): The OpenAI API key. Defaults to None.
15 | key_hints (any, optional): Key hints to assist in identifying keys. Defaults to None.
16 |
17 | Returns:
18 | tuple[dict, bool]: A tuple containing the results of the validation and a boolean indicating if the validation was successful.
19 | """
20 |
21 | results = {}
22 | valid = True
23 | with tqdm(total=len(output_schema['properties']), desc="Validating schema", disable=quiet) as pbar:
24 | for key, value in output_schema['properties'].items():
25 | pbar.set_postfix_str(f"Key: {key}", refresh=True)
26 | response_key, response_reasoning = identify_key(key, value, input_json, openai_api_key, key_hints)
27 |
28 | if response_key is not None:
29 | results[key] = {"identified": True, "key": response_key,
30 | "message": response_reasoning,
31 | **value}
32 | else:
33 | results[key] = {"identified": False, "key": response_key,
34 | "message": response_reasoning,
35 | **value}
36 | if key in output_schema['required']:
37 | results[key]['required'] = True
38 | if results[key]['identified'] == False:
39 | valid = False
40 | else:
41 | results[key]['required'] = False
42 | pbar.update(1)
43 |
44 | return results, valid
45 |
46 |
47 | def translate_schema(input_json, output_schema, openai_api_key: str | None = None, key_hints=None, max_retries=10, quiet=False) -> str:
48 | """
49 | Translate the input JSON schema into a filtering query using jq.
50 |
51 | Args:
52 | input_json (dict): The input JSON to be reformatted.
53 | output_schema (dict): The desired output schema using standard schema formatting.
54 | openai_api_key (str, optional): OpenAI API key. Defaults to None.
55 | key_hints (None, optional): Hints for translating keys. Defaults to None.
56 | max_retries (int, optional): Maximum number of retries for creating a valid jq filter. Defaults to 10.
57 |
58 | Returns:
59 | str: The filtering query in jq syntax.
60 |
61 | Raises:
62 | RuntimeError: If the input JSON does not contain the required data to satisfy the output schema.
63 | RuntimeError: If failed to create a valid jq filter after maximum retries.
64 | RuntimeError: If failed to validate the jq filter after maximum retries.
65 | """
66 |
67 | schema_properties, is_valid = validate_schema(input_json, output_schema, key_hints=key_hints, openai_api_key=openai_api_key, quiet=quiet)
68 | if not is_valid:
69 | raise RuntimeError(
70 | f"The input JSON does not contain the required data to satisfy the output schema: \n\n{json.dumps(schema_properties, indent=2)}")
71 |
72 | filtered_schema = {k: v for k, v in schema_properties.items() if v['identified'] == True}
73 |
74 | filter_query = {}
75 |
76 | with tqdm(total=len(filtered_schema), desc="Translating schema", disable=quiet) as pbar, tqdm(total=max_retries, desc="Retry attempts", disable=quiet) as pbar_retries:
77 | for key, value in filtered_schema.items():
78 | pbar.set_postfix_str(f"Key: {key}", refresh=True)
79 | jq_string = create_jq_string(input_json, key, value, openai_api_key)
80 |
81 | # If the response is empty, skip the key
82 | if jq_string == "None":
83 | pbar.update(1)
84 | continue
85 |
86 | tries = 0
87 | while True:
88 | try:
89 | key_query = jq.compile(jq_string).input(input_json).all()
90 | break
91 | except Exception as e:
92 | tries += 1
93 | pbar_retries.update(1)
94 | jq_string = repair_query(jq_string, str(e), input_json, openai_api_key)
95 | if tries >= max_retries:
96 | raise RuntimeError(
97 | f"Failed to create a valid jq filter for key '{key}' after {max_retries} retries.")
98 | pbar.update(1)
99 | filter_query[key] = jq_string
100 | pbar.close()
101 | pbar_retries.close()
102 | complete_filter = dict_to_jq_filter(filter_query)
103 | # Validate JSON
104 | tries = 0
105 | with tqdm(total=max_retries, desc="Validation attempts", disable=quiet) as pbar_validation:
106 | while True:
107 | try:
108 | result = jq.compile(complete_filter).input(input_json).all()[0]
109 | validate(instance=result, schema=output_schema)
110 | pbar_validation.close()
111 | break
112 | except Exception as e:
113 | tries += 1
114 | pbar_validation.update(1)
115 | if tries >= max_retries:
116 | raise RuntimeError(f"Failed to validate the jq filter after {max_retries} retries.")
117 | complete_filter = repair_query(complete_filter, str(e), input_json, openai_api_key)
118 | pbar.close()
119 | return complete_filter
120 |
--------------------------------------------------------------------------------
/jaiqu/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AgentOps-AI/Jaiqu/a2ce57eddb967e883d0cdbde91507b636230b86a/jaiqu/tests/__init__.py
--------------------------------------------------------------------------------
/jaiqu/tests/calendar/event.schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "type": "object",
4 | "properties": {
5 | "event_name": {
6 | "type": "string",
7 | "description": "The title of the event"
8 | },
9 | "event_description": {
10 | "type": "string",
11 | "description": "Descriptive details of the event"
12 | },
13 | "location": {
14 | "type": "string",
15 | "description": "Where is the event"
16 | }
17 | },
18 | "required": [
19 | "event_name",
20 | "location"
21 |
22 | ]
23 | }
--------------------------------------------------------------------------------
/jaiqu/tests/calendar/gcal/input.json:
--------------------------------------------------------------------------------
1 | {
2 | "kind": "calendar#event",
3 | "etag": "\"317317317317317317\"",
4 | "id": "123abc456def",
5 | "status": "confirmed",
6 | "htmlLink": "https://www.google.com/calendar/event?eid=123abc456def",
7 | "created": "2024-02-10T12:00:00Z",
8 | "updated": "2024-02-10T15:00:00Z",
9 | "summary": "Project Launch Meeting",
10 | "description": "Initial meeting to discuss the roadmap and deliverables for the new project.",
11 | "location": "Conference Room B, 123 Business Rd, City, Country",
12 | "colorId": "9",
13 | "creator": {
14 | "id": "creator01",
15 | "email": "creator@example.com",
16 | "displayName": "Alex Smith",
17 | "self": false
18 | },
19 | "organizer": {
20 | "id": "org01",
21 | "email": "organizer@example.com",
22 | "displayName": "Company ABC",
23 | "self": true
24 | },
25 | "start": {
26 | "date": null,
27 | "dateTime": "2024-02-15T09:00:00Z",
28 | "timeZone": "America/New_York"
29 | },
30 | "end": {
31 | "date": null,
32 | "dateTime": "2024-02-15T10:00:00Z",
33 | "timeZone": "America/New_York"
34 | },
35 | "endTimeUnspecified": false,
36 | "recurrence": [
37 | "RRULE:FREQ=WEEKLY;COUNT=4"
38 | ],
39 | "recurringEventId": "",
40 | "originalStartTime": {
41 | "date": null,
42 | "dateTime": "2024-02-15T09:00:00Z",
43 | "timeZone": "America/New_York"
44 | },
45 | "transparency": "opaque",
46 | "visibility": "public",
47 | "iCalUID": "123abc456def@google.com",
48 | "sequence": 0,
49 | "attendees": [
50 | {
51 | "id": "attendee01",
52 | "email": "attendee1@example.com",
53 | "displayName": "Jordan Doe",
54 | "organizer": false,
55 | "self": false,
56 | "resource": false,
57 | "optional": false,
58 | "responseStatus": "needsAction",
59 | "comment": "",
60 | "additionalGuests": 0
61 | }
62 | ],
63 | "attendeesOmitted": false,
64 | "extendedProperties": {
65 | "private": {
66 | "projectCode": "PRJ12345"
67 | },
68 | "shared": {
69 | "agendaLink": "https://example.com/meeting-agenda"
70 | }
71 | },
72 | "hangoutLink": "https://meet.google.com/abc-defg-hij",
73 | "conferenceData": {
74 | "createRequest": {
75 | "requestId": "xyz123abc456",
76 | "conferenceSolutionKey": {
77 | "type": "hangoutsMeet"
78 | },
79 | "status": {
80 | "statusCode": "success"
81 | }
82 | },
83 | "entryPoints": [
84 | {
85 | "entryPointType": "video",
86 | "uri": "https://meet.google.com/abc-defg-hij",
87 | "label": "Google Meet",
88 | "pin": "123456",
89 | "accessCode": "",
90 | "meetingCode": "abc-defg-hij",
91 | "passcode": "",
92 | "password": ""
93 | }
94 | ],
95 | "conferenceSolution": {
96 | "key": {
97 | "type": "hangoutsMeet"
98 | },
99 | "name": "Google Meet",
100 | "iconUri": "https://meet.google.com/icon.png"
101 | },
102 | "conferenceId": "abc-defg-hij",
103 | "signature": "ABCD1234EF"
104 | },
105 | "gadget": {
106 | "type": "Google Docs",
107 | "title": "Meeting Agenda",
108 | "link": "https://docs.google.com/document/d/example",
109 | "iconLink": "https://ssl.gstatic.com/docs/doclist/images/icon_11_document_list.png",
110 | "width": 300,
111 | "height": 200,
112 | "display": "chip",
113 | "preferences": {
114 | "documentId": "1A2b3C4d5E6f"
115 | }
116 | },
117 | "anyoneCanAddSelf": false,
118 | "guestsCanInviteOthers": true,
119 | "guestsCanModify": false,
120 | "guestsCanSeeOtherGuests": true,
121 | "privateCopy": false,
122 | "locked": false,
123 | "reminders": {
124 | "useDefault": true,
125 | "overrides": []
126 | },
127 | "source": {
128 | "url": "https://example.com/project-details",
129 | "title": "Project Details Page"
130 | },
131 | "workingLocationProperties": {
132 | "type": "customLocation",
133 | "homeOffice": null,
134 | "customLocation": {
135 | "label": "Remote"
136 | },
137 | "officeLocation": {
138 | "buildingId": "",
139 | "floorId": "",
140 | "floorSectionId": "",
141 | "deskId": "",
142 | "label": ""
143 | }
144 | },
145 | "outOfOfficeProperties": {
146 | "autoDeclineMode": "DECLINED",
147 | "declineMessage": "I will be out of office during this event."
148 | },
149 | "focusTimeProperties": {
150 | "autoDeclineMode": "FOCUSED",
151 | "declineMessage": "I am focusing on deep work during this time.",
152 | "chatStatus": "DND"
153 | },
154 | "attachments": [
155 | {
156 | "fileUrl": "https://docs.google.com/document/d/example",
157 | "title": "Project Documentation",
158 | "mimeType": "application/vnd.google-apps.document",
159 | "iconLink": "https://ssl.gstatic.com/docs/doclist/images/icon_11_document_list.png",
160 | "fileId": "1A2b3C4d5E6f"
161 | }
162 | ],
163 | "eventType": "default"
164 | }
--------------------------------------------------------------------------------
/jaiqu/tests/calendar/outlook/input.json:
--------------------------------------------------------------------------------
1 | {
2 | "@odata.context": "https://graph.microsoft.com/v1.0/$metadata#Collection(microsoft.graph.scheduleInformation)",
3 | "value": [
4 | {
5 | "scheduleId": "adelev@contoso.onmicrosoft.com",
6 | "availabilityView": "000220000",
7 | "scheduleItems": [
8 | {
9 | "isPrivate": false,
10 | "status": "busy",
11 | "subject": "Let's go for lunch",
12 | "location": "Harry's Bar",
13 | "start": {
14 | "dateTime": "2019-03-15T12:00:00.0000000",
15 | "timeZone": "Pacific Standard Time"
16 | },
17 | "end": {
18 | "dateTime": "2019-03-15T14:00:00.0000000",
19 | "timeZone": "Pacific Standard Time"
20 | }
21 | }
22 | ],
23 | "workingHours": {
24 | "daysOfWeek": [
25 | "monday",
26 | "tuesday",
27 | "wednesday",
28 | "thursday",
29 | "friday"
30 | ],
31 | "startTime": "08:00:00.0000000",
32 | "endTime": "17:00:00.0000000",
33 | "timeZone": {
34 | "name": "Pacific Standard Time"
35 | }
36 | }
37 | },
38 | {
39 | "scheduleId": "meganb@contoso.onmicrosoft.com",
40 | "availabilityView": "200220010",
41 | "scheduleItems": [
42 | {
43 | "status": "busy",
44 | "start": {
45 | "dateTime": "2019-03-15T08:30:00.0000000",
46 | "timeZone": "Pacific Standard Time"
47 | },
48 | "end": {
49 | "dateTime": "2019-03-15T09:30:00.0000000",
50 | "timeZone": "Pacific Standard Time"
51 | }
52 | },
53 | {
54 | "status": "busy",
55 | "start": {
56 | "dateTime": "2019-03-15T12:00:00.0000000",
57 | "timeZone": "Pacific Standard Time"
58 | },
59 | "end": {
60 | "dateTime": "2019-03-15T14:00:00.0000000",
61 | "timeZone": "Pacific Standard Time"
62 | }
63 | },
64 | {
65 | "status": "tentative",
66 | "start": {
67 | "dateTime": "2019-03-15T12:00:00.0000000",
68 | "timeZone": "Pacific Standard Time"
69 | },
70 | "end": {
71 | "dateTime": "2019-03-15T13:00:00.0000000",
72 | "timeZone": "Pacific Standard Time"
73 | }
74 | },
75 | {
76 | "status": "busy",
77 | "start": {
78 | "dateTime": "2019-03-15T13:00:00.0000000",
79 | "timeZone": "Pacific Standard Time"
80 | },
81 | "end": {
82 | "dateTime": "2019-03-15T14:00:00.0000000",
83 | "timeZone": "Pacific Standard Time"
84 | }
85 | },
86 | {
87 | "status": "tentative",
88 | "start": {
89 | "dateTime": "2019-03-15T16:00:00.0000000",
90 | "timeZone": "Pacific Standard Time"
91 | },
92 | "end": {
93 | "dateTime": "2019-03-15T17:00:00.0000000",
94 | "timeZone": "Pacific Standard Time"
95 | }
96 | }
97 | ],
98 | "workingHours": {
99 | "daysOfWeek": [
100 | "monday",
101 | "tuesday",
102 | "wednesday",
103 | "thursday",
104 | "friday"
105 | ],
106 | "startTime": "08:00:00.0000000",
107 | "endTime": "17:00:00.0000000",
108 | "timeZone": {
109 | "@odata.type": "#microsoft.graph.customTimeZone",
110 | "bias": 480,
111 | "name": "Customized Time Zone",
112 | "standardOffset": {
113 | "time": "02:00:00.0000000",
114 | "dayOccurrence": 1,
115 | "dayOfWeek": "sunday",
116 | "month": 11,
117 | "year": 0
118 | },
119 | "daylightOffset": {
120 | "daylightBias": -60,
121 | "time": "02:00:00.0000000",
122 | "dayOccurrence": 2,
123 | "dayOfWeek": "sunday",
124 | "month": 3,
125 | "year": 0
126 | }
127 | }
128 | }
129 | }
130 | ]
131 | }
--------------------------------------------------------------------------------
/jaiqu/tests/llms/anthropic/input.json:
--------------------------------------------------------------------------------
1 | {
2 | "id": "msg_018vxZcM9a74Zu2UHqH2mfDd",
3 | "content": [
4 | {
5 | "text": "Sample completion text",
6 | "type": "text"
7 | }
8 | ],
9 | "model": "claude-2.1",
10 | "role": "assistant",
11 | "stop_reason": "end_turn",
12 | "stop_sequence": null,
13 | "type": "message",
14 | "usage": {
15 | "input_tokens": 27,
16 | "output_tokens": 22
17 | }
18 | }
--------------------------------------------------------------------------------
/jaiqu/tests/llms/arize_openetelemetry/input.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "llm",
3 | "context": {
4 | "trace_id": "409df945-e058-4829-b240-cfbdd2ff4488",
5 | "span_id": "01fa9612-01b8-4358-85d6-e3e067305ec3"
6 | },
7 | "span_kind": "LLM",
8 | "parent_id": "2fe8a793-2cf1-42d7-a1df-bd7d46e017ef",
9 | "start_time": "2024-01-11T16:45:17.982858-07:00",
10 | "end_time": "2024-01-11T16:45:18.517639-07:00",
11 | "status_code": "OK",
12 | "status_message": "",
13 | "attributes": {
14 | "llm.input_messages": [
15 | {
16 | "message.role": "system",
17 | "message.content": "You are a Shakespearean writing assistant who speaks in a Shakespearean style. You help people come up with creative ideas and content like stories, poems, and songs that use Shakespearean style of writing style, including words like \"thou\" and \"hath\u201d.\nHere are some example of Shakespeare's style:\n - Romeo, Romeo! Wherefore art thou Romeo?\n - Love looks not with the eyes, but with the mind; and therefore is winged Cupid painted blind.\n - Shall I compare thee to a summer's day? Thou art more lovely and more temperate.\n"
18 | },
19 | { "message.role": "user", "message.content": "what is 23 times 87" }
20 | ],
21 | "llm.model_name": "gpt-3.5-turbo-0613",
22 | "llm.invocation_parameters": "{\"model\": \"gpt-3.5-turbo-0613\", \"temperature\": 0.1, \"max_tokens\": null}",
23 | "output.value": "{\"tool_calls\": [{\"id\": \"call_Re47Qyh8AggDGEEzlhb4fu7h\", \"function\": {\"arguments\": \"{\\n \\\"a\\\": 23,\\n \\\"b\\\": 87\\n}\", \"name\": \"multiply\"}, \"type\": \"function\"}]}",
24 | "output.mime_type": "application/json",
25 | "llm.output_messages": [
26 | {
27 | "message.role": "assistant",
28 | "message.tool_calls": [
29 | {
30 | "tool_call.function.name": "multiply",
31 | "tool_call.function.arguments": "{\n \"a\": 23,\n \"b\": 87\n}"
32 | }
33 | ]
34 | }
35 | ],
36 | "llm.token_count.prompt": 229,
37 | "llm.token_count.completion": 21,
38 | "llm.token_count.total": 250
39 | },
40 | "events": []
41 | }
--------------------------------------------------------------------------------
/jaiqu/tests/llms/errors.schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "type": "object",
4 | "properties": {
5 | "exception.type": {
6 | "type": "string",
7 | "description": "The type of exception that was thrown"
8 | },
9 | "exception.message": {
10 | "type": "string",
11 | "description": "Detailed message describing the exception"
12 | },
13 | "exception.escaped": {
14 | "type": "boolean",
15 | "description": "Indicator if the exception has escaped the span's scope"
16 | },
17 | "exception.stacktrace": {
18 | "type": "string",
19 | "description": "The stack trace of the exception"
20 | }
21 | },
22 | "required": [
23 | "exception.type",
24 | "exception.message",
25 | "exception.escaped",
26 | "exception.stacktrace"
27 | ]
28 | }
--------------------------------------------------------------------------------
/jaiqu/tests/llms/llms.schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "type": "object",
4 | "properties": {
5 | "id": {
6 | "type": [
7 | "string",
8 | "null"
9 | ],
10 | "description": "A unique identifier for the record."
11 | },
12 | "thread_id": {
13 | "type": "string",
14 | "format": "uuid",
15 | "description": "A UUID representing the thread."
16 | },
17 | "output": {
18 | "type": "string",
19 | "description": "The output value of an operation"
20 | },
21 | "input": {
22 | "type": "string",
23 | "description": "The input value to an operation"
24 | },
25 | "invocation_parameters": {
26 | "type": "string",
27 | "description": "Parameters used during the invocation of an LLM or API"
28 | },
29 | "prompt": {
30 | "type": "array",
31 | "items": {
32 | "type": "object",
33 | "properties": {
34 | "message.role": {
35 | "type": "string"
36 | },
37 | "message.content": {
38 | "type": "string"
39 | }
40 | },
41 | "required": [
42 | "message.role",
43 | "message.content"
44 | ]
45 | },
46 | "description": "List of messages sent to the LLM in a chat API request"
47 | },
48 | "completion": {
49 | "type": "array",
50 | "items": {
51 | "type": "object",
52 | "properties": {
53 | "message.role": {
54 | "type": "string"
55 | },
56 | "message.content": {
57 | "type": "string"
58 | }
59 | },
60 | "required": [
61 | "message.role",
62 | "message.content"
63 | ]
64 | },
65 | "description": "List of messages received from the LLM in a chat API request"
66 | },
67 | "prompt_tokens": {
68 | "type": "integer",
69 | "description": "The number of tokens in the prompt"
70 | },
71 | "completion_tokens": {
72 | "type": "integer",
73 | "description": "The number of tokens in the completion"
74 | },
75 | "total_tokens": {
76 | "type": "integer",
77 | "description": "Total number of tokens, including prompt and completion"
78 | },
79 | "tool_calls": {
80 | "type": "array",
81 | "items": {
82 | "type": "object",
83 | "properties": {
84 | "tool_call.function.name": {
85 | "type": "string"
86 | }
87 | },
88 | "required": [
89 | "tool_call.function.name"
90 | ]
91 | },
92 | "description": "List of tool calls (e.g. function calls) generated by the LLM"
93 | },
94 | "model_name": {
95 | "type": "string",
96 | "description": "The name of the language model being utilized"
97 | },
98 | "tool.name": {
99 | "type": "string",
100 | "description": "The name of the tool being utilized"
101 | },
102 | "tool.description": {
103 | "type": "string",
104 | "description": "Description of the tool's purpose and functionality"
105 | },
106 | "tool.parameters": {
107 | "type": "string",
108 | "description": "The parameters definition for invoking the tool"
109 | },
110 | "metadata": {
111 | "type": "string",
112 | "description": "Metadata associated with a span"
113 | },
114 | "init_timestamp": {
115 | "type": "string",
116 | "format": "date-time",
117 | "description": "The initial timestamp when the operation was started"
118 | },
119 | "end_timestamp": {
120 | "type": "string",
121 | "format": "date-time",
122 | "description": "The ending timestamp when the operation was completed"
123 | }
124 | },
125 | "required": [
126 | "output",
127 | "input",
128 | "invocation_parameters",
129 | "prompt",
130 | "completion",
131 | "model_name",
132 | "init_timestamp",
133 | "end_timestamp"
134 | ]
135 | }
--------------------------------------------------------------------------------
/jaiqu/tests/llms/openai/schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "type": "object",
4 | "properties": {
5 | "id": {
6 | "type": ["string", "null"],
7 | "description": "A unique identifier for the record."
8 | },
9 | "thread_id": {
10 | "type": "string",
11 | "format": "uuid",
12 | "description": "A UUID representing the thread."
13 | },
14 | "completion": {
15 | "type": ["array", "string"],
16 | "description": "A JSONB object representing the completion."
17 | },
18 | "model": {
19 | "type": "string",
20 | "description": "A text field representing the model used."
21 | },
22 | "prompt_tokens": {
23 | "type": "number",
24 | "description": "A numeric field representing the number of tokens in the prompt."
25 | },
26 | "completion_tokens": {
27 | "type": "number",
28 | "description": "A numeric field representing the number of tokens in the completion."
29 | },
30 | "init_timestamp": {
31 | "type": "string",
32 | "format": "date-time",
33 | "description": "A timestamp with time zone indicating the initialization time."
34 | }
35 | },
36 | "required": [
37 | "id",
38 | "completion",
39 | "model"
40 | ]
41 | }
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["setuptools>=61.0", "wheel"]
3 | build-backend = "setuptools.build_meta"
4 |
5 | [project]
6 | name = "jaiqu"
7 | version = "0.0.6"
8 | authors = [
9 | { name = "Alex Reibman", email = "areibman@gmail.com" },
10 | { name = "Howard Gil", email = "howardbgil@gmail.com" },
11 | { name = "Braelyn Boynton", email = "bboynton97@gmail.com" }
12 | ]
13 | description = "AI utility to extract data from any JSON and reformat it into a new JSON with repeatable queries."
14 | readme = "README.md"
15 | requires-python = ">=3.7"
16 | classifiers = [
17 | "Programming Language :: Python :: 3",
18 | "License :: OSI Approved :: MIT License",
19 | "Operating System :: OS Independent",
20 | ]
21 | dependencies = [
22 | "jq==1.6.0",
23 | "openai>=1.63.2",
24 | "jsonschema==4.21.1",
25 | "typer==0.9.0",
26 | ]
27 |
28 | [project.optional-dependencies]
29 | dev = [
30 | "pytest>=7.4.4",
31 | "flake8>=3.1.0",
32 | "coverage[toml]>=7.4.0",
33 | ]
34 |
35 | [project.urls]
36 | Homepage = "https://github.com/AgentOps-AI/Jaiqu"
37 | Issues = "https://github.com/AgentOps-AI/Jaiqu/issues"
38 |
39 | [project.entry-points.console_scripts]
40 | jaiqu = "jaiqu.cli:main"
41 |
42 | [tool.setuptools]
43 | packages = { find = { where = ["."], exclude = ["samples"] } }
44 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | jq==1.6.0
2 | openai>=1.12.0,<2.0.0
3 | jsonschema==4.21.1
4 | typer==0.9.0
--------------------------------------------------------------------------------
/samples/data.json:
--------------------------------------------------------------------------------
1 | {
2 | "call.id": "123",
3 | "datetime": "2022-01-01",
4 | "timestamp": 1640995200,
5 | "Address": "123 Main St",
6 | "user": {
7 | "name": "John Doe",
8 | "age": 30,
9 | "contact": "john@email.com"
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/samples/schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-07/schema#",
3 | "type": "object",
4 | "properties": {
5 | "id": {
6 | "type": ["string", "null"],
7 | "description": "A unique identifier for the record."
8 | },
9 | "date": {
10 | "type": "string",
11 | "description": "A string describing the date."
12 | },
13 | "model": {
14 | "type": "string",
15 | "description": "A text field representing the model used."
16 | }
17 | },
18 | "required": [
19 | "id",
20 | "date"
21 | ]
22 | }
23 |
--------------------------------------------------------------------------------
/streamlit_app.py:
--------------------------------------------------------------------------------
1 | import streamlit as st
2 | import json
3 | import jq
4 | import os
5 | from jaiqu import validate_schema, translate_schema
6 |
7 | # Set page layout to wide
8 | st.set_page_config(layout="wide", page_title="Jaiqu: AI JSON Schema to JQ Query Generator")
9 |
10 | # Custom styles for Streamlit elements
11 | st.markdown(
12 | """
13 |
21 | """,
22 | unsafe_allow_html=True
23 | )
24 |
25 | # Title of the app with custom color
26 | st.markdown("Jaiqu: AI Schema to JQ Query Generator
",
27 | unsafe_allow_html=True) # Added horizontal padding to the title
28 |
29 | st.header('Desired data format')
30 | col1, col2 = st.columns(2)
31 |
32 | with col1:
33 | schema_json = st.text_area('Enter the desired JSON schema', value=json.dumps({
34 | "$schema": "http://json-schema.org/draft-07/schema#",
35 | "type": "object",
36 | "properties": {
37 | "id": {
38 | "type": ["string", "null"],
39 | "description": "A unique identifier for the record."
40 | },
41 | "date": {
42 | "type": "string",
43 | "description": "A string describing the date."
44 | },
45 | "model": {
46 | "type": "string",
47 | "description": "A text field representing the model used."
48 | }
49 | },
50 | "required": [
51 | "id",
52 | "date"
53 | ]
54 | }, indent=2), height=200)
55 |
56 | with col2:
57 | input_json_str = st.text_area('Enter the input JSON', value=json.dumps({
58 | "call.id": "123",
59 | "datetime": "2022-01-01",
60 | "timestamp": 1640995200,
61 | "Address": "123 Main St",
62 | "user": {
63 | "name": "John Doe",
64 | "age": 30,
65 | "contact": "john@email.com"
66 | }
67 | }, indent=2), height=200)
68 |
69 | with col1:
70 | schema = json.loads(schema_json)
71 | st.json(schema, expanded=False)
72 |
73 | with col2:
74 | input_json = json.loads(input_json_str)
75 | st.json(input_json, expanded=False)
76 |
77 | st.markdown("
", # Added horizontal margin to the horizontal line
78 | unsafe_allow_html=True)
79 |
80 | st.header('Optional Inputs')
81 | opt_col1, opt_col2 = st.columns(2)
82 |
83 | with opt_col1:
84 | key_hints = st.text_area('Enter any hints for key mapping',
85 | value="We are processing outputs of an containing an id and a date of a user.", height=100)
86 |
87 | with opt_col2:
88 | max_retries = st.number_input('Set maximum retries for translation', min_value=1,
89 | value=20, format="%d")
90 | openai_api_key = st.text_input('Enter your OpenAI API key', type="password")
91 |
92 | # Validate schema
93 | if st.button('Validate Schema', key="validate_schema"):
94 |
95 | if not openai_api_key:
96 | st.error("Please provide your OpenAI API key to validate the schema.")
97 | st.stop()
98 |
99 | with st.spinner('Validating schema...'):
100 | schema_properties, valid = validate_schema(input_json, schema,
101 | openai_api_key=openai_api_key, key_hints=key_hints)
102 | st.write('Schema is valid:', valid)
103 | st.json(schema_properties, expanded=False)
104 |
105 | # Translate schema
106 | if st.button('Translate Schema', key="translate_schema"):
107 | with st.spinner('Translating schema...'):
108 |
109 | if not openai_api_key:
110 | st.error("Please provide your OpenAI API key to translate the schema.")
111 | st.stop()
112 |
113 | jq_query = translate_schema(input_json, schema,
114 | openai_api_key=openai_api_key,
115 | key_hints=key_hints, max_retries=int(max_retries))
116 | st.text('Finalized jq query')
117 | st.code(jq_query, language="jq")
118 |
119 | with st.spinner('Checking the jq query results...'):
120 | # Check the jq query results
121 | st.text('JQ query results')
122 | try:
123 | result = jq.compile(jq_query).input(input_json).all()[0]
124 | st.write(result)
125 | except Exception as e:
126 | st.error(f"Error: {e}")
127 |
--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
1 | [tox]
2 | envlist = py3, flake8
3 | isolated_build = true
4 |
5 | [testenv]
6 | deps =
7 | pytest
8 | coverage
9 |
10 | [testenv:flake8]
11 | deps = flake8
12 | commands = flake8 jaiqu/
13 |
14 | [flake8]
15 | max-line-length = 120
16 | per-file-ignores =
17 | tokencost/__init__.py: F401
--------------------------------------------------------------------------------