├── tests └── __init__.py ├── .gitignore ├── docugami_kg_rag ├── __init__.py ├── config │ ├── huggingface.py │ ├── fireworksai_llama3.py │ ├── fireworksai_mixtral.py │ ├── openai.py │ ├── redis.py │ ├── chromadb.py │ └── __init__.py ├── state_models.py ├── green_examples │ ├── answer_examples.yaml │ ├── standalone_question_examples.yaml │ ├── summarize_chunk_examples.yaml │ ├── describe_document_set_examples.yaml │ ├── sql_fixup_examples.yaml │ ├── summarize_document_examples.yaml │ └── sql_examples.yaml ├── agent.py └── indexing.py ├── .vscode └── launch.json ├── .github ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── PULL_REQUEST_TEMPLATE.md ├── workflows │ └── ci.yml └── CONTRIBUTING.md ├── .devcontainer └── devcontainer.json ├── LICENSE ├── index.py ├── Makefile ├── pyproject.toml ├── notebooks ├── run-csv.ipynb └── eval-sec-10-q.ipynb └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .mypy_cache 3 | .venv 4 | .env 5 | temp 6 | dist -------------------------------------------------------------------------------- /docugami_kg_rag/__init__.py: -------------------------------------------------------------------------------- 1 | from docugami_kg_rag.agent import agent 2 | 3 | __all__ = ["agent"] 4 | -------------------------------------------------------------------------------- /docugami_kg_rag/config/huggingface.py: -------------------------------------------------------------------------------- 1 | # Reference: https://huggingface.co/models 2 | import torch 3 | from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings 4 | 5 | device = "cpu" 6 | if torch.cuda.is_available(): 7 | device = "cuda" 8 | 9 | EMBEDDINGS = HuggingFaceEmbeddings( 10 | model_name="sentence-transformers/all-mpnet-base-v2", 11 | model_kwargs={"device": device}, 12 | ) 13 | 14 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | "name": "Python Debugger: Attach using Process Id", 6 | "type": "debugpy", 7 | "request": "attach", 8 | "processId": "${command:pickProcess}", 9 | "justMyCode": false 10 | }, 11 | { 12 | "name": "Python Debugger: Python File", 13 | "type": "debugpy", 14 | "request": "launch", 15 | "program": "${file}", 16 | "justMyCode": false 17 | } 18 | ] 19 | } -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Browser [e.g. chrome, safari] 29 | - Version [e.g. 22] 30 | 31 | **Additional context** 32 | Add any other context about the problem here. 33 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | 9 | steps: 10 | - name: Check out the code 11 | uses: actions/checkout@v3 12 | 13 | - name: Install Poetry 14 | run: | 15 | curl -sSL https://install.python-poetry.org | python3 - 16 | shell: bash 17 | 18 | - name: Install dependencies 19 | run: poetry install --all-extras 20 | 21 | - name: Install mypy 22 | run: poetry run pip install mypy 23 | 24 | - name: Lint code 25 | run: make lint 26 | 27 | - name: Check spellings 28 | run: make spell_check 29 | 30 | - name: Check PR status 31 | run: | 32 | if [ -n "$(git diff --name-only ${{ github.base_ref }}..${{ github.head_ref }})" ]; then 33 | echo "Changes detected. Please make sure to push all changes to the branch before merging."; 34 | exit 1; 35 | fi 36 | -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/ubuntu 3 | { 4 | "name": "Ubuntu", 5 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 6 | "image": "mcr.microsoft.com/devcontainers/base:jammy", 7 | "features": { 8 | "ghcr.io/devcontainers/features/python:1": {}, 9 | "ghcr.io/devcontainers-contrib/features/poetry:2": {} 10 | } 11 | 12 | // Features to add to the dev container. More info: https://containers.dev/features. 13 | // "features": {}, 14 | 15 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 16 | // "forwardPorts": [], 17 | 18 | // Use 'postCreateCommand' to run commands after the container is created. 19 | // "postCreateCommand": "uname -a", 20 | 21 | // Configure tool-specific properties. 22 | // "customizations": {}, 23 | 24 | // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. 25 | // "remoteUser": "root" 26 | } 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 LangChain, Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docugami_kg_rag/state_models.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from pathlib import Path 3 | from typing import List 4 | 5 | from langchain.storage.in_memory import InMemoryStore 6 | 7 | 8 | @dataclass 9 | class ReportDetails: 10 | id: str 11 | """ID of report.""" 12 | 13 | name: str 14 | """Name of report.""" 15 | 16 | local_xlsx_path: Path 17 | """Local path to XLSX of the report.""" 18 | 19 | retrieval_tool_function_name: str 20 | """Function name for retrieval tool e.g. sql_query_earnings_calls.""" 21 | 22 | retrieval_tool_description: str 23 | """ 24 | Description of retrieval tool e.g. Runs a SQL query over the REPORT_NAME report, 25 | represented as the following SQL Table... etc.""" 26 | 27 | 28 | @dataclass 29 | class LocalIndexState: 30 | full_doc_summaries_by_id: InMemoryStore 31 | """Mapping of ID to full document summaries.""" 32 | 33 | chunks_by_id: InMemoryStore 34 | """Mapping of ID to chunks.""" 35 | 36 | retrieval_tool_function_name: str 37 | """Function name for retrieval tool e.g. "search_earnings_calls.""" 38 | 39 | retrieval_tool_description: str 40 | """Description of retrieval tool e.g. Searches for and returns chunks from earnings call documents.""" 41 | 42 | reports: List[ReportDetails] = field(default_factory=list) 43 | """Details about any reports for this docset.""" 44 | -------------------------------------------------------------------------------- /docugami_kg_rag/config/fireworksai_llama3.py: -------------------------------------------------------------------------------- 1 | # Reference: https://fireworks.ai/models 2 | import os 3 | 4 | from langchain_fireworks.chat_models import ChatFireworks 5 | 6 | if "FIREWORKS_API_KEY" not in os.environ: 7 | raise Exception("FIREWORKS_API_KEY environment variable not set") 8 | LARGE_CONTEXT_INSTRUCT_LLM = ChatFireworks( 9 | model="accounts/fireworks/models/llama-v3-70b-instruct", # input context limit is 8k tokens 10 | temperature=0, 11 | max_tokens=8 * 1024, # this sets the total token max (input and output) 12 | model_kwargs={ 13 | "context_length_exceeded_behavior": "truncate", 14 | }, 15 | cache=True, 16 | ) 17 | SMALL_CONTEXT_INSTRUCT_LLM = LARGE_CONTEXT_INSTRUCT_LLM # Use the same model for large and small context tasks 18 | SQL_GEN_LLM = LARGE_CONTEXT_INSTRUCT_LLM # Use the same model for sql gen 19 | LLM_BATCH_SIZE = 1 20 | 21 | # Lengths for the Docugami loader are in terms of characters, 1 token ~= 4 chars in English 22 | # Reference: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them 23 | MIN_LENGTH_TO_SUMMARIZE = 2048 # chunks and docs below this length are embedded as-is 24 | MAX_FULL_DOCUMENT_TEXT_LENGTH = int(1024 * 4 * 4.5) # ~4.5k tokens 25 | MAX_CHUNK_TEXT_LENGTH = int(1024 * 4 * 1) # ~1k tokens 26 | MIN_CHUNK_TEXT_LENGTH = int(1024 * 4 * 0.5) # ~0.5k tokens 27 | SUB_CHUNK_TABLES = False 28 | INCLUDE_XML_TAGS = False 29 | PARENT_HIERARCHY_LEVELS = 2 30 | RETRIEVER_K = 8 31 | -------------------------------------------------------------------------------- /docugami_kg_rag/config/fireworksai_mixtral.py: -------------------------------------------------------------------------------- 1 | # Reference: https://fireworks.ai/models 2 | import os 3 | 4 | from langchain_fireworks.chat_models import ChatFireworks 5 | 6 | if "FIREWORKS_API_KEY" not in os.environ: 7 | raise Exception("FIREWORKS_API_KEY environment variable not set") 8 | LARGE_CONTEXT_INSTRUCT_LLM = ChatFireworks( 9 | model="accounts/fireworks/models/mixtral-8x7b-instruct", # input context limit is 32k tokens 10 | temperature=0, 11 | max_tokens=32 * 1024, # this sets the total token max (input and output) 12 | model_kwargs={ 13 | "context_length_exceeded_behavior": "truncate", 14 | }, 15 | cache=True, 16 | ) 17 | SMALL_CONTEXT_INSTRUCT_LLM = LARGE_CONTEXT_INSTRUCT_LLM # Use the same model for large and small context tasks 18 | SQL_GEN_LLM = LARGE_CONTEXT_INSTRUCT_LLM # Use the same model for sql gen 19 | LLM_BATCH_SIZE = 32 20 | 21 | # Lengths for the Docugami loader are in terms of characters, 1 token ~= 4 chars in English 22 | # Reference: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them 23 | MIN_LENGTH_TO_SUMMARIZE = 2048 # chunks and docs below this length are embedded as-is 24 | MAX_FULL_DOCUMENT_TEXT_LENGTH = int(1024 * 4 * 14) # ~14k tokens 25 | MAX_CHUNK_TEXT_LENGTH = int(1024 * 4 * 4.5) # ~4.5k tokens 26 | MIN_CHUNK_TEXT_LENGTH = int(1024 * 4 * 1.5) # ~1.5k tokens 27 | SUB_CHUNK_TABLES = False 28 | INCLUDE_XML_TAGS = False 29 | PARENT_HIERARCHY_LEVELS = 2 30 | RETRIEVER_K = 8 31 | -------------------------------------------------------------------------------- /docugami_kg_rag/config/openai.py: -------------------------------------------------------------------------------- 1 | # OpenAI models and Embeddings 2 | # Reference: https://platform.openai.com/docs/models 3 | import os 4 | 5 | from langchain_openai import ChatOpenAI, OpenAIEmbeddings 6 | 7 | if "OPENAI_API_KEY" not in os.environ: 8 | raise Exception("OPENAI_API_KEY environment variable not set") 9 | 10 | LARGE_CONTEXT_INSTRUCT_LLM = ChatOpenAI( 11 | temperature=0, 12 | model="gpt-4-turbo-preview", # input context limit is 128k tokens 13 | cache=True, 14 | max_tokens=2 * 1024, # only output tokens 15 | ) 16 | SMALL_CONTEXT_INSTRUCT_LLM = ChatOpenAI( 17 | temperature=0, 18 | model="gpt-3.5-turbo-1106", # input context limit is 16k tokens 19 | cache=True, 20 | max_tokens=2 * 1024, # only output tokens 21 | ) 22 | SQL_GEN_LLM = SMALL_CONTEXT_INSTRUCT_LLM # Use the same model for sql gen 23 | LLM_BATCH_SIZE = 256 24 | 25 | EMBEDDINGS = OpenAIEmbeddings(model="text-embedding-ada-002") 26 | 27 | # Lengths for the Docugami loader are in terms of characters, 1 token ~= 4 chars in English 28 | # Reference: https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them 29 | MIN_LENGTH_TO_SUMMARIZE = 2048 # chunks and docs below this length are embedded as-is 30 | MAX_FULL_DOCUMENT_TEXT_LENGTH = int(1024 * 4 * 14) # ~14k tokens 31 | MAX_CHUNK_TEXT_LENGTH = int(1024 * 4 * 6.5) # ~6.5k tokens 32 | MIN_CHUNK_TEXT_LENGTH = int(1024 * 4 * 1.5) # ~1.5k tokens 33 | SUB_CHUNK_TABLES = True 34 | INCLUDE_XML_TAGS = True 35 | PARENT_HIERARCHY_LEVELS = 2 36 | RETRIEVER_K = 8 37 | 38 | -------------------------------------------------------------------------------- /index.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import typer 4 | from docugami import Docugami 5 | 6 | from docugami_kg_rag.indexing import index_docset 7 | 8 | docugami_client = Docugami() 9 | 10 | app = typer.Typer() 11 | 12 | 13 | @app.command() 14 | def main() -> None: 15 | docsets_response = docugami_client.docsets.list() 16 | 17 | if not docsets_response or not docsets_response.docsets: 18 | raise Exception("The workspace corresponding to the provided DOCUGAMI_API_KEY does not have any docsets.") 19 | 20 | docsets = docsets_response.docsets 21 | 22 | typer.echo("Your workspace contains the following Docsets:\n") 23 | for idx, docset in enumerate(docsets, start=1): 24 | print(f"{idx}: {docset.name} (ID: {docset.id})") 25 | user_input = typer.prompt( 26 | "\nPlease enter the number(s) of the docset(s) to index (comma-separated) or 'all' to index all docsets" 27 | ) 28 | 29 | if user_input.lower() == "all": 30 | selected_docsets = [d for d in docsets] 31 | else: 32 | selected_indices = [int(i.strip()) for i in user_input.split(",")] 33 | selected_docsets = [docsets[idx - 1] for idx in selected_indices if 0 < idx <= len(docsets)] 34 | 35 | for docset in [d for d in selected_docsets if d is not None]: 36 | if not docset.id or not docset.name: 37 | raise Exception(f"Docset must have ID as well as Name: {docset}") 38 | 39 | index_docset(docset.id, docset.name) 40 | 41 | 42 | if __name__ == "__main__": 43 | if sys.gettrace(): 44 | # This code will only run if a debugger is attached 45 | index_docset(docset_id="s85dxu9aie2h", name="SEC 10Q Reports") 46 | else: 47 | app() 48 | -------------------------------------------------------------------------------- /docugami_kg_rag/config/redis.py: -------------------------------------------------------------------------------- 1 | # Reference: https://python.langchain.com/docs/integrations/vectorstores/redis 2 | from typing import List, Optional 3 | 4 | from docugami_langchain.retrievers.fused_summary import ( 5 | FILE_ID_KEY, 6 | PARENT_CHUNK_ID_KEY, 7 | SOURCE_KEY, 8 | ) 9 | from langchain_community.vectorstores.redis.base import Redis, check_index_exists 10 | from langchain_core.documents import Document 11 | from langchain_core.embeddings import Embeddings 12 | from langchain_core.vectorstores import VectorStore 13 | 14 | REDIS_URL = "redis://localhost:6379" 15 | REDIS_INDEX_SCHEMA = { 16 | "text": [ 17 | {"name": "id"}, 18 | {"name": PARENT_CHUNK_ID_KEY}, 19 | {"name": FILE_ID_KEY}, 20 | {"name": SOURCE_KEY}, 21 | ], 22 | } 23 | 24 | 25 | def vector_store_index_exists(docset_id: str, embeddings: Embeddings) -> bool: 26 | conn = Redis(redis_url=REDIS_URL, index_name=docset_id, embedding=embeddings) 27 | return check_index_exists(conn.client, docset_id) 28 | 29 | 30 | def get_vector_store_index(docset_id: str, embeddings: Embeddings) -> Optional[VectorStore]: 31 | if vector_store_index_exists(docset_id, embeddings): 32 | return Redis.from_existing_index( 33 | embedding=embeddings, index_name=docset_id, schema=REDIS_INDEX_SCHEMA, redis_url=REDIS_URL # type: ignore 34 | ) 35 | else: 36 | return None 37 | 38 | 39 | def init_vector_store_index(docset_id: str, docs: List[Document], embeddings: Embeddings, force: bool = True) -> VectorStore: 40 | if force and vector_store_index_exists(docset_id, embeddings): 41 | del_vector_store_index(docset_id) 42 | 43 | return Redis.from_documents(docs, index_name=docset_id, embedding=embeddings, redis_url=REDIS_URL) 44 | 45 | 46 | def del_vector_store_index(docset_id: str) -> None: 47 | Redis.drop_index(docset_id, True, redis_url=REDIS_URL) 48 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all format lint test tests integration_tests docker_tests help extended_tests 2 | 3 | # Default target executed when no arguments are given to make. 4 | all: help 5 | 6 | # Define a variable for the test file path. 7 | TEST_FILE ?= tests/ docugami_kg_rag/ 8 | 9 | test: 10 | poetry run pytest --doctest-modules $(TEST_FILE) 11 | 12 | tests: 13 | poetry run pytest --doctest-modules $(TEST_FILE) 14 | 15 | ###################### 16 | # LINTING AND FORMATTING 17 | ###################### 18 | 19 | # Define a variable for Python and notebook files. 20 | PYTHON_FILES=. 21 | MYPY_CACHE=.mypy_cache 22 | lint format: PYTHON_FILES=. 23 | lint_package: PYTHON_FILES=docugami_kg_rag 24 | lint_tests: PYTHON_FILES=tests 25 | lint_tests: MYPY_CACHE=.mypy_cache_test 26 | 27 | lint lint_diff lint_package lint_tests: 28 | poetry run ruff check . 29 | poetry run ruff check $(PYTHON_FILES) --diff 30 | poetry run ruff check --select I $(PYTHON_FILES) 31 | mkdir -p $(MYPY_CACHE); poetry run mypy $(PYTHON_FILES) --cache-dir $(MYPY_CACHE) 32 | 33 | format format_diff: 34 | poetry run ruff check --select I --fix $(PYTHON_FILES) 35 | 36 | spell_check: 37 | poetry run codespell --skip "./tests/testdata/*,*.yaml,*.lock,*.pdf,*.csv" --toml pyproject.toml 38 | 39 | spell_fix: 40 | poetry run codespell --skip "./tests/testdata/*" --toml pyproject.toml -w 41 | 42 | check_imports: $(shell find docugami_kg_rag -name '*.py') 43 | poetry run python ./scripts/check_imports.py $^ 44 | 45 | ###################### 46 | # HELP 47 | ###################### 48 | 49 | help: 50 | @echo '----' 51 | @echo 'check_imports - check imports' 52 | @echo 'format - run code formatters' 53 | @echo 'lint - run linters' 54 | @echo 'spell_check - run spell checker' 55 | @echo 'test - run unit tests' 56 | @echo 'tests - run unit tests' 57 | @echo 'test TEST_FILE= - run all tests in file' 58 | -------------------------------------------------------------------------------- /docugami_kg_rag/config/chromadb.py: -------------------------------------------------------------------------------- 1 | # ChromaDB 2 | # Reference: https://python.langchain.com/docs/integrations/vectorstores/chroma 3 | from pathlib import Path 4 | from typing import List, Optional 5 | 6 | import chromadb 7 | from langchain_community.vectorstores.chroma import Chroma 8 | from langchain_core.documents import Document 9 | from langchain_core.embeddings import Embeddings 10 | from langchain_core.vectorstores import VectorStore 11 | 12 | CHROMA_DIRECTORY = Path("/tmp/docugami/chroma_db") 13 | CHROMA_DIRECTORY.mkdir(parents=True, exist_ok=True) 14 | 15 | 16 | def vector_store_index_exists(docset_id: str, embeddings: Embeddings) -> bool: 17 | persistent_client = chromadb.PersistentClient(path=str(CHROMA_DIRECTORY.absolute())) 18 | collections = persistent_client.list_collections() 19 | for c in collections: 20 | if c.name == docset_id: 21 | return True 22 | 23 | return False 24 | 25 | 26 | def get_vector_store_index(docset_id: str, embeddings: Embeddings) -> Optional[VectorStore]: 27 | if vector_store_index_exists(docset_id, embeddings): 28 | return Chroma( 29 | collection_name=docset_id, 30 | persist_directory=str(CHROMA_DIRECTORY.absolute()), 31 | embedding_function=embeddings, 32 | ) 33 | 34 | return None 35 | 36 | 37 | def init_vector_store_index(docset_id: str, docs: List[Document], embeddings: Embeddings, force: bool = True) -> VectorStore: 38 | if force and vector_store_index_exists(docset_id, embeddings): 39 | del_vector_store_index(docset_id) 40 | 41 | return Chroma.from_documents( 42 | documents=docs, 43 | collection_name=docset_id, 44 | embedding=embeddings, 45 | persist_directory=str(CHROMA_DIRECTORY.absolute()), 46 | ) 47 | 48 | 49 | def del_vector_store_index(docset_id: str) -> None: 50 | persistent_client = chromadb.PersistentClient(path=str(CHROMA_DIRECTORY.absolute())) 51 | persistent_client.delete_collection(docset_id) 52 | -------------------------------------------------------------------------------- /docugami_kg_rag/green_examples/answer_examples.yaml: -------------------------------------------------------------------------------- 1 | - question: What is the capital of france? 2 | chat_history: 3 | answer: Paris 4 | - question: What star is closest to earth? 5 | chat_history: 6 | answer: The sun is the closest star to the Earth. However, if you mean which star other than the Sun is the closest, that is Proxima Centauri. 7 | - question: Who wrote the book that introduced these characters? 8 | chat_history: | 9 | Human: Can you name a famous wizard from literature? 10 | AI: One of the most famous wizards from literature is Harry Potter. 11 | Human: And who is his main enemy? 12 | AI: Harry Potter's main enemy is Lord Voldemort. 13 | answer: J.K. Rowling wrote the Harry Potter series, which introduced these characters. 14 | - question: What's the combined height of these mountains? 15 | chat_history: | 16 | Human: What is the tallest mountain in the world? 17 | AI: Mount Everest is the tallest mountain in the world. 18 | Human: What about the second tallest? 19 | AI: The second tallest is K2. 20 | answer: Mount Everest is approximately 8,848 meters tall, and K2 is about 8,611 meters tall. The combined height is approximately 17,459 meters. 21 | - question: Which city are they planning to visit next? 22 | chat_history: | 23 | Human: What is the capital of Italy? 24 | AI: The capital of Italy is Rome. 25 | Human: Which city is famous for the Leaning Tower? 26 | AI: The city famous for the Leaning Tower is Pisa. 27 | answer: Based on the interest in famous landmarks, they are planning to visit Pisa next. 28 | - question: Who's considered more influential in their field? 29 | chat_history: | 30 | Human: Name a famous physicist. 31 | AI: Albert Einstein is a famous physicist known for the theory of relativity. 32 | Human: Who is a renowned biologist? 33 | AI: Charles Darwin is renowned for his work on the theory of evolution. 34 | answer: Both Albert Einstein and Charles Darwin are considered highly influential in their respective fields, but the influence is domain-specific. Einstein is more influential in physics, while Darwin is in biology. 35 | -------------------------------------------------------------------------------- /docugami_kg_rag/config/__init__.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import os 3 | from pathlib import Path 4 | 5 | from gptcache import Cache 6 | from gptcache.manager.factory import manager_factory 7 | from gptcache.processor.pre import get_prompt 8 | from langchain_community.cache import GPTCache 9 | from langchain_core.globals import set_llm_cache 10 | 11 | from docugami_kg_rag.config.chromadb import * 12 | 13 | # from docugami_kg_rag.config.fireworksai_llama3 import * 14 | # from docugami_kg_rag.config.fireworksai_mixtral import * 15 | # from docugami_kg_rag.config.huggingface import * 16 | from docugami_kg_rag.config.openai import * 17 | 18 | # from docugami_kg_rag.config.redis import * 19 | 20 | DOCUGAMI_API_ENDPOINT = "https://api.docugami.com/v1preview1" 21 | DOCUGAMI_API_KEY = os.environ.get("DOCUGAMI_API_KEY") 22 | if not DOCUGAMI_API_KEY: 23 | raise Exception("Please set the DOCUGAMI_API_KEY environment variable") 24 | 25 | 26 | INDEXING_LOCAL_STATE_PATH = os.environ.get("INDEXING_LOCAL_STATE_PATH", "/tmp/docugami/indexing_local_state.pkl") 27 | os.makedirs(Path(INDEXING_LOCAL_STATE_PATH).parent, exist_ok=True) 28 | 29 | INDEXING_LOCAL_REPORT_DBS_ROOT = os.environ.get("INDEXING_LOCAL_REPORT_DBS_ROOT", "/tmp/docugami/report_dbs") 30 | os.makedirs(Path(INDEXING_LOCAL_REPORT_DBS_ROOT).parent, exist_ok=True) 31 | 32 | LOCAL_LLM_CACHE_DIR = os.environ.get("LOCAL_LLM_CACHE", "/tmp/docugami/langchain_cache") 33 | os.makedirs(Path(LOCAL_LLM_CACHE_DIR).parent, exist_ok=True) 34 | 35 | 36 | def get_hashed_name(name: str) -> str: 37 | return hashlib.sha256(name.encode()).hexdigest() 38 | 39 | 40 | def init_gptcache(cache_obj: Cache, llm: str) -> None: 41 | hashed_llm = get_hashed_name(llm) 42 | hashed_llm_dir = Path(LOCAL_LLM_CACHE_DIR) / hashed_llm 43 | cache_obj.init( 44 | pre_embedding_func=get_prompt, 45 | data_manager=manager_factory(manager="map", data_dir=str(hashed_llm_dir.absolute())), 46 | ) 47 | 48 | 49 | set_llm_cache(GPTCache(init_gptcache)) 50 | 51 | EXAMPLES_PATH = Path(__file__).parent.parent / "green_examples" 52 | 53 | DEFAULT_USE_REPORTS = True 54 | DEFAULT_USE_CONVERSATIONAL_TOOLS = True 55 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "docugami-kg-rag" 3 | version = "0.6.0" 4 | description = "This template contains a reference architecture for Retrieval Augmented Generation against a set of documents using Docugami's XML Knowledge Graph (KG-RAG)." 5 | authors = [ 6 | "Docugami ", 7 | ] 8 | readme = "README.md" 9 | 10 | [tool.poetry.dependencies] 11 | python = ">=3.9,<4.0" 12 | langchain = ">=0.1.19" 13 | langchain-community = ">=0.2.3" 14 | docugami-langchain = "^0.0.16" 15 | # docugami-langchain = {git = "https://github.com/docugami/docugami-langchain.git"} 16 | # docugami-langchain = {git = "https://github.com/docugami/docugami-langchain.git", rev = "tjaffri/rel"} 17 | typer = ">=0.9.0" 18 | docugami = ">=0.1.2" 19 | pyarrow = ">=16.0.0" 20 | langsmith = ">=0.1.52" 21 | openpyxl = ">=3.1.2" 22 | faiss-cpu = ">=1.8.0" 23 | torch = "2.2.0" 24 | gptcache = "^0.1.43" 25 | langchain-openai = { version = "*", optional = true } 26 | langchain-fireworks = { version = "*", optional = true } 27 | chromadb = { version = ">=0.5.0", optional = true } 28 | redis = { version = ">=5.0.4", optional = true } 29 | redisvl = { version = ">=0.2.0", optional = true } 30 | sentence-transformers = { version = "*", optional = true } 31 | 32 | [tool.poetry.group.dev.dependencies] 33 | codespell = "*" 34 | langchain-cli = ">=0.0.22rc1" 35 | ipykernel = "*" 36 | black = "*" 37 | pytest = "*" 38 | pyyaml = "*" 39 | ruff = "*" 40 | 41 | [tool.poetry.extras] 42 | openai = ["langchain-openai", "tiktoken"] 43 | fireworks = ["langchain-fireworks"] 44 | chromadb = ["chromadb"] 45 | redis = ["redis", "redisvl"] 46 | huggingface = ["langchain-huggingface", "sentence-transformers"] 47 | 48 | [tool.black] 49 | line-length = 127 50 | 51 | [tool.ruff] 52 | lint.select = [ 53 | "E", # pycodestyle 54 | "F", # pyflakes 55 | "I", # isort 56 | ] 57 | line-length = 200 58 | lint.ignore = ["E501", "E402", "F403"] 59 | 60 | [tool.mypy] 61 | disallow_untyped_defs = true 62 | disable_error_code = ["typeddict-item"] 63 | exclude = "/temp/" 64 | 65 | [[tool.mypy.overrides]] 66 | module = [ 67 | "torch", "langchain_openai.*", "langchain_fireworks.*", "docugami_langchain.*", "gptcache.*", "llama_cpp.*" 68 | ] 69 | ignore_missing_imports = true 70 | 71 | [tool.langserve] 72 | export_module = "docugami_kg_rag" 73 | export_attr = "agent" 74 | 75 | [tool.templates-hub] 76 | use-case = "rag" 77 | author = "Docugami" 78 | integrations = ["OpenAI"] 79 | tags = ["vectordbs"] 80 | 81 | [tool.pytest.ini_options] 82 | addopts = "--doctest-modules" 83 | norecursedirs = ".venv temp" 84 | 85 | [build-system] 86 | requires = [ 87 | "poetry-core", 88 | ] 89 | build-backend = "poetry.core.masonry.api" 90 | -------------------------------------------------------------------------------- /docugami_kg_rag/green_examples/standalone_question_examples.yaml: -------------------------------------------------------------------------------- 1 | - chat_history: 2 | human: What is the capital of france? 3 | standalone_agent_input: What is the capital of france? 4 | - chat_history: 5 | human: What star is closest to earth? 6 | standalone_agent_input: What star is closest to earth? 7 | - chat_history: | 8 | Human: Can you name a famous wizard from literature? 9 | ---------------- 10 | AI: One of the most famous wizards from literature is Harry Potter. 11 | ---------------- 12 | Human: And who is his main enemy? 13 | ---------------- 14 | AI: Harry Potter's main enemy is Lord Voldemort. 15 | ---------------- 16 | human: Who wrote the book that introduced these characters? 17 | standalone_agent_input: Who is the author of the book that includes the characters Harry Potter and Lord Voldemort? 18 | - chat_history: | 19 | Human: What is the tallest mountain in the world? 20 | ---------------- 21 | AI: Mount Everest is the tallest mountain in the world. 22 | ---------------- 23 | Human: What about the second tallest? 24 | ---------------- 25 | AI: The second tallest is K2. 26 | ---------------- 27 | human: What's the combined height of these mountains? 28 | standalone_agent_input: What is the combined height of Mount Everest and K2? 29 | - chat_history: | 30 | Human: What is the capital of Italy? 31 | ---------------- 32 | AI: The capital of Italy is Rome. 33 | ---------------- 34 | Human: Which city is famous for the Leaning Tower? 35 | ---------------- 36 | AI: The city famous for the Leaning Tower is Pisa. 37 | ---------------- 38 | human: What continent are these cities in? 39 | standalone_agent_input: What continent are Rome and Pisa in? 40 | - chat_history: | 41 | Human: Name a famous physicist. 42 | ---------------- 43 | AI: Albert Einstein is a famous physicist known for the theory of relativity. 44 | ---------------- 45 | Human: Who is a renowned biologist? 46 | ---------------- 47 | AI: Charles Darwin is renowned for his work on the theory of evolution. 48 | ---------------- 49 | human: Who's considered more influential in their field? 50 | standalone_agent_input: Between Albert Einstein and Charles Darwin, who is considered more influential in their field? 51 | - chat_history: | 52 | Human: What is the largest city in King County, WA? 53 | ---------------- 54 | AI: Seattle is the largest city in King County, Washington. 55 | ---------------- 56 | Human: Do you know who it was named after? 57 | ---------------- 58 | AI: Seattle is named after Chief Si'ahl (1780-1866), a leader of the Suquamish and Duwamish people. 59 | ---------------- 60 | human: List some major companies headquartered there 61 | standalone_agent_input: Which major companies are headquartered in the Seattle area? 62 | -------------------------------------------------------------------------------- /docugami_kg_rag/green_examples/summarize_chunk_examples.yaml: -------------------------------------------------------------------------------- 1 | - format: text 2 | contents: | 3 | This Agreement shall be governed by and construed in accordance with the laws of the State of [State], 4 | without regard to its conflict of laws principles. 5 | summary: Governing law and jurisdiction determined by the laws of [State]. 6 | 7 | - format: text 8 | contents: | 9 | Each party shall indemnify, defend and hold harmless the other party and its officers, directors, employees, 10 | agents, affiliates, successors and assigns from and against any and all claims, losses, damages, liabilities, 11 | penalties, judgments, costs and expenses (including reasonable attorneys' fees) arising out of or resulting 12 | from the indemnifying party's gross negligence or willful misconduct in connection with this Agreement. 13 | summary: Mutual indemnification for negligence or misconduct. 14 | 15 | - format: text 16 | contents: | 17 | Either party may terminate this Agreement upon thirty (30) days' written notice to the other party if the 18 | other party breaches any material term or condition of this Agreement and fails to cure such breach within 19 | such thirty (30) day period. 20 | summary: Termination for material breach with 30-day cure period. 21 | 22 | - format: text 23 | contents: | 24 | The term of this Agreement shall commence on the Effective Date and continue in full force and effect for 25 | an initial term of one (1) year, and shall automatically renew for successive one (1) year terms, unless 26 | either party provides written notice of its intent not to renew at least thirty (30) days prior to the 27 | expiration of the then-current term. 28 | summary: Initial term of one year with automatic renewal, unless 30-day notice given. 29 | 30 | - format: text 31 | contents: | 32 | All notices, requests, demands, and other communications under this Agreement shall be in writing and 33 | shall be deemed to have been given on the date of service if served personally, or three (3) days after 34 | mailing if mailed, by first class mail, registered or certified, postage prepaid, and addressed to the 35 | parties at the addresses specified in this Agreement or at such other address as a party may designate 36 | by notice given as herein provided. 37 | summary: Written notices deemed given on date of service or three days after mailing. 38 | 39 | - format: text 40 | contents: | 41 | The parties acknowledge that the relationship between them is that of independent contractors. Nothing 42 | contained in this Agreement shall be construed as creating any agency, partnership, joint venture or 43 | other form of joint enterprise, employment or fiduciary relationship between the parties, and neither 44 | party shall have authority to contract for or bind the other party in any manner whatsoever. 45 | summary: Parties as independent contractors, not agents, employees, or partners. 46 | -------------------------------------------------------------------------------- /notebooks/run-csv.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# CSV Eval\n", 8 | "\n", 9 | "Runs the agent against a CSV with a single \"question\" column. Writes out another CSV with the \"answer\" column included.\n" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "!rm -rf temp\n", 19 | "!git clone https://github.com/docugami/KG-RAG-datasets.git temp" 20 | ] 21 | }, 22 | { 23 | "cell_type": "code", 24 | "execution_count": 2, 25 | "metadata": {}, 26 | "outputs": [], 27 | "source": [ 28 | "from datetime import datetime\n", 29 | "import os\n", 30 | "import pandas as pd\n", 31 | "from pathlib import Path\n", 32 | "\n", 33 | "INPUT_CSV_PATH = Path(os.getcwd()) / \"temp/sec-10-q/data/raw_questions/questions_mini.csv\"\n", 34 | "EVAL_NAME = INPUT_CSV_PATH.stem + \"_\" + datetime.now().strftime(\"%Y-%m-%d\")\n", 35 | "OUTPUT_CSV_PATH = INPUT_CSV_PATH.with_name(EVAL_NAME + \"_answers\" + INPUT_CSV_PATH.suffix)" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "metadata": {}, 41 | "source": [ 42 | "## Create Docugami Agent" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": 6, 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [ 51 | "from docugami_kg_rag.agent import agent as docugami_agent\n", 52 | "from langchain_core.messages import HumanMessage\n", 53 | "\n", 54 | "\n", 55 | "def predict_docugami_agent(question: str) -> str:\n", 56 | " return docugami_agent.invoke(\n", 57 | " {\n", 58 | " \"messages\": [HumanMessage(content=question)],\n", 59 | " }\n", 60 | " )" 61 | ] 62 | }, 63 | { 64 | "cell_type": "code", 65 | "execution_count": 7, 66 | "metadata": {}, 67 | "outputs": [ 68 | { 69 | "name": "stdout", 70 | "output_type": "stream", 71 | "text": [ 72 | "Output CSV created at: /root/Source/github/langchain-template-docugami-kg-rag/notebooks/temp/sec-10-q/data/raw_questions/questions_mini_2024-03-28_answers.csv\n" 73 | ] 74 | } 75 | ], 76 | "source": [ 77 | "os.environ['LANGCHAIN_PROJECT'] = EVAL_NAME\n", 78 | "\n", 79 | "# Eval the CSV\n", 80 | "df = pd.read_csv(INPUT_CSV_PATH)\n", 81 | "if \"Question\" in df.columns:\n", 82 | " # Apply the predict function to each question and create a new column for the answers\n", 83 | " df[\"Answer\"] = df[\"Question\"].apply(predict_docugami_agent)\n", 84 | " # Write the dataframe with questions and answers to the output CSV\n", 85 | " df.to_csv(OUTPUT_CSV_PATH, index=False)\n", 86 | " print(f\"Output CSV created at: {OUTPUT_CSV_PATH}\")\n", 87 | "else:\n", 88 | " print(\"Error: The 'Question' column does not exist in the input CSV.\")" 89 | ] 90 | } 91 | ], 92 | "metadata": { 93 | "kernelspec": { 94 | "display_name": "docugami-kg-rag-sMPCFT4i-py3.9", 95 | "language": "python", 96 | "name": "python3" 97 | }, 98 | "language_info": { 99 | "codemirror_mode": { 100 | "name": "ipython", 101 | "version": 3 102 | }, 103 | "file_extension": ".py", 104 | "mimetype": "text/x-python", 105 | "name": "python", 106 | "nbconvert_exporter": "python", 107 | "pygments_lexer": "ipython3", 108 | "version": "3.9.16" 109 | } 110 | }, 111 | "nbformat": 4, 112 | "nbformat_minor": 2 113 | } 114 | -------------------------------------------------------------------------------- /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Docugami 2 | 3 | Hi there! Thank you for even being interested in contributing to Docugami's langchain-template-docugami-kg-rag. 4 | As an open-source project in a rapidly developing field, we are extremely open to contributions, whether they involve new features, improved infrastructure, better documentation, or bug fixes. 5 | 6 | ## 🗺️ Guidelines 7 | 8 | ### 👩‍💻 Contributing Code 9 | 10 | To contribute to this project, please follow the ["fork and pull request"](https://docs.github.com/en/get-started/quickstart/contributing-to-projects) workflow. 11 | Please do not try to push directly to this repo unless you are a maintainer. 12 | 13 | Please follow the checked-in pull request template when opening pull requests. Note related issues and tag relevant 14 | maintainers. 15 | 16 | Pull requests cannot land without passing the formatting, linting, and testing checks first. See [Testing](#testing) and 17 | [Formatting and Linting](#formatting-and-linting) for how to run these checks locally. 18 | 19 | If there's something you'd like to add or change, opening a pull request is the 20 | best way to get our attention. 21 | 22 | ### 🚩GitHub Issues 23 | 24 | Our [issues](https://github.com/docugami/langchain-template-docugami-kg-rag/issues) page is kept up to date with bugs, improvements, and feature requests. 25 | 26 | If you start working on an issue, please assign it to yourself. 27 | 28 | If you are adding an issue, please try to keep it focused on a single, modular bug/improvement/feature. 29 | If two issues are related, or blocking, please link them rather than combining them. 30 | 31 | We will try to keep these issues as up-to-date as possible, though 32 | with the rapid rate of development in this field some may get out of date. 33 | If you notice this happening, please let us know. 34 | 35 | ### 🙋Getting Help 36 | 37 | Our goal is to have the simplest developer setup possible. Should you experience any difficulty getting setup, please 38 | contact a maintainer! Not only do we want to help get you unblocked, but we also want to make sure that the process is 39 | smooth for future contributors. 40 | 41 | In a similar vein, we do enforce certain linting, formatting, and documentation standards in the codebase. 42 | If you are finding these difficult (or even just annoying) to work with, feel free to contact a maintainer for help - 43 | we do not want these to get in the way of getting good code into the codebase. 44 | 45 | ### Local Development Dependencies 46 | 47 | Install langchain-template-docugami-kg-rag development requirements (for running langchain-template-docugami-kg-rag, running examples, linting, formatting, tests, and coverage): 48 | 49 | ```bash 50 | poetry install 51 | ``` 52 | 53 | Then verify dependency installation: 54 | 55 | ```bash 56 | make test 57 | ``` 58 | 59 | ### Testing 60 | 61 | Unit tests cover modular logic that does not require calls to outside APIs. 62 | If you add new logic, please add a unit test. 63 | 64 | To run unit tests: 65 | 66 | ```bash 67 | make test 68 | ``` 69 | 70 | ### Formatting and Linting 71 | 72 | Run these locally before submitting a PR; the CI system will check also. 73 | 74 | #### Code Formatting 75 | 76 | Formatting for this project is done via [ruff](https://docs.astral.sh/ruff/rules/). 77 | 78 | To run formatting for docs, cookbook and templates: 79 | 80 | ```bash 81 | make format 82 | ``` 83 | 84 | #### Linting 85 | 86 | Linting for this project is done via a combination of [ruff](https://docs.astral.sh/ruff/rules/) and [mypy](http://mypy-lang.org/). 87 | 88 | To run linting for docs, cookbook and templates: 89 | 90 | ```bash 91 | make lint 92 | ``` 93 | 94 | We recognize linting can be annoying - if you do not want to do it, please contact a project maintainer, and they can help you with it. We do not want this to be a blocker for good code getting contributed. 95 | -------------------------------------------------------------------------------- /docugami_kg_rag/green_examples/describe_document_set_examples.yaml: -------------------------------------------------------------------------------- 1 | - summaries: | 2 | **************** 3 | DOCUMENT SUMMARY: 4 | 5 | On November 21, 2007, at 0001 UTC, a Boeing B737-400, registered under DCA08WA016, experienced an incident near Jakarta, Indonesia. The aircraft, operated by Batavia Airlines, was forced to return to Jakarta-Sukarno-Hatta airport 6 | following a loss of hydraulic pressure. Minor damage was reported, including a missing upper wing panel and damaged adjacent hydraulic tubing. The incident occurred shortly after routine maintenance, where some fasteners were not 7 | reinstalled correctly. There were no injuries to the crew or passengers, but the aircraft sustained minor damage. The flight was conducted under non-U.S., commercial regulations. 8 | 9 | The National Transportation Safety Committee of Indonesia is leading the investigation, with the Ministry of Communications of the Republic of Indonesia providing oversight. The aircraft was not amateur-built, had its last 10 | inspection noted without specifying the date, and was certified for commercial operations without any operating certificates held. Meteorological conditions at the time of the incident were visual, with the flight plan filed 11 | under IFR conditions. The departure point was Jakarta, with the destination unknown. 12 | 13 | Airport information indicates the use of a runway compatible with IFR approaches, and wreckage examination confirmed the minor damage to the aircraft without any fire or explosion. There were no injuries on the ground. The 14 | investigation docket is managed by the NTSB, with William English serving as the Investigator in Charge (IIC). Public access to investigation dockets prior to June 1, 2009, is available through NTSB's Record Management Division, 15 | with subsequent dockets accessible online at the NTSB's official docket management system. 16 | 17 | **************** 18 | DOCUMENT SUMMARY: 19 | 20 | National Transportation Safety Board Aviation Accident Factual Report details an accident involving a Cirrus Design Corp. SR-20, registration N141CD, which occurred on November 28, 2007, at 1800 UTC near Jagel, Germany. The aircraft 21 | was destroyed after striking power lines and impacting terrain during an approach for landing, resulting in the fatal injury of the private pilot, the sole occupant. The flight, originating in the Netherlands and destined for Sylt, 22 | Germany, was conducted under Part 91: General Aviation as a ferry flight. Instrument meteorological conditions prevailed at the time of the accident. 23 | 24 | The aircraft, manufactured by Cirrus Design Corporation with serial number 1032, sustained substantial damage. The investigation is under the jurisdiction of the German Federal Bureau of Aircraft Accidents Investigation (BFU), 25 | located in Braunschweig, Germany. Information regarding the pilot's certification, ratings, and medical certification is included but specifics are not detailed in the summary. The aircraft and owner/operator information, 26 | along with meteorological conditions, flight plan, airport information, and wreckage and impact information, are documented, highlighting the substantial aircraft damage and the fatal injury to the pilot. 27 | 28 | The report mentions that the investigation docket is maintained by the NTSB, with Investigator in Charge (IIC) David C. Bowling and additional participation from Karsten Severin of the German BFU. The accident and incident 29 | dockets are available for public access, providing archival information for NTSB investigations. 30 | 31 | **************** 32 | DOCUMENT SUMMARY: 33 | 34 | The National Transportation Safety Board (NTSB) issued a final report for aviation accident number SEA08CA039, which occurred in Yelm, WA, on November 24, 2007, at 1030 PST. The accident involved a Piper PA-18A-135, registration 35 | N3250B, which sustained substantial damage during landing on a gravel river bar. The pilot, a 37-year-old male with a private certificate and 228 hours of total flight time, failed to maintain directional control during the 36 | landing roll, resulting in the aircraft impacting a log adjacent to the landing area. There were no mechanical anomalies found during the post-accident inspection. The pilot was the only person on board and sustained no injuries. 37 | The flight was conducted under Part 91: General Aviation for personal reasons, with no flight plan filed. 38 | 39 | The probable cause of the accident was identified as the pilot's failure to maintain directional control of the tailwheel-equipped airplane during the landing roll. Contributing factors included the rough/uneven terrain and 40 | the presence of a log. The report also provided detailed information on the pilot's qualifications, aircraft data, meteorological conditions, and administrative details, including the investigator in charge, Joshua D Cawthra. 41 | The NTSB's investigation aims to improve transportation safety by determining the causes of accidents and issuing recommendations to prevent future incidents. 42 | docset_name: NTSB Accident Report 43 | description: | 44 | This document type is a detailed report issued by the National Transportation Safety Board (NTSB) regarding aviation accidents or incidents. It includes information about the circumstances, causes, and findings of the accident 45 | or incident, as well as details about the aircraft, individuals involved, and the investigation process. 46 | -------------------------------------------------------------------------------- /docugami_kg_rag/agent.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import sys 3 | from typing import List, Optional, Union 4 | 5 | from docugami_langchain.agents import AgentState, ReActAgent 6 | from docugami_langchain.chains.rag.standalone_question_chain import StandaloneQuestionChain 7 | from docugami_langchain.history import get_chat_history_from_messages, get_question_from_messages 8 | from docugami_langchain.tools.common import BaseDocugamiTool, get_generic_tools 9 | from docugami_langchain.tools.reports import get_retrieval_tool_for_report 10 | from docugami_langchain.tools.retrieval import get_retrieval_tool_for_docset 11 | from langchain_core.documents import Document 12 | from langchain_core.messages import AIMessage, HumanMessage 13 | from langchain_core.pydantic_v1 import BaseModel, Field 14 | from langchain_core.runnables import Runnable, RunnableLambda 15 | 16 | from docugami_kg_rag.config import ( 17 | DEFAULT_USE_CONVERSATIONAL_TOOLS, 18 | DEFAULT_USE_REPORTS, 19 | EMBEDDINGS, 20 | EXAMPLES_PATH, 21 | LARGE_CONTEXT_INSTRUCT_LLM, 22 | RETRIEVER_K, 23 | SMALL_CONTEXT_INSTRUCT_LLM, 24 | SQL_GEN_LLM, 25 | get_vector_store_index, 26 | ) 27 | from docugami_kg_rag.indexing import read_all_local_index_state 28 | 29 | 30 | def build_tools( 31 | use_reports: bool = DEFAULT_USE_REPORTS, 32 | use_conversation_tools: bool = DEFAULT_USE_CONVERSATIONAL_TOOLS, 33 | ) -> List[BaseDocugamiTool]: 34 | """ 35 | Build retrieval tools. 36 | """ 37 | 38 | local_state = read_all_local_index_state() 39 | 40 | tools: List[BaseDocugamiTool] = [] 41 | for docset_id in local_state: 42 | docset_state = local_state[docset_id] 43 | chunk_vectorstore = get_vector_store_index(docset_id, EMBEDDINGS) 44 | 45 | if chunk_vectorstore is not None: 46 | 47 | def _fetch_parent_doc_callback(key: str) -> Optional[str]: 48 | results = docset_state.chunks_by_id.mget([key]) 49 | if results and results[0]: 50 | first_result: Document = results[0] 51 | return first_result.page_content 52 | return None 53 | 54 | def _fetch_full_doc_summary_callback(key: str) -> Optional[str]: 55 | results = docset_state.full_doc_summaries_by_id.mget([key]) 56 | if results and results[0]: 57 | first_result: Document = results[0] 58 | return first_result.page_content 59 | return None 60 | 61 | direct_retrieval_tool = get_retrieval_tool_for_docset( 62 | chunk_vectorstore=chunk_vectorstore, 63 | retrieval_tool_function_name=docset_state.retrieval_tool_function_name, 64 | retrieval_tool_description=docset_state.retrieval_tool_description, 65 | llm=LARGE_CONTEXT_INSTRUCT_LLM, 66 | embeddings=EMBEDDINGS, 67 | fetch_parent_doc_callback=_fetch_parent_doc_callback, 68 | fetch_full_doc_summary_callback=_fetch_full_doc_summary_callback, 69 | retrieval_k=RETRIEVER_K, 70 | ) 71 | if direct_retrieval_tool: 72 | # Direct retrieval tool for each indexed docset (direct KG-RAG against semantic XML) 73 | tools.append(direct_retrieval_tool) 74 | 75 | if use_reports: 76 | for report in docset_state.reports: 77 | # Report retrieval tool for each published report (user-curated views on semantic XML) 78 | report_retrieval_tool = get_retrieval_tool_for_report( 79 | local_xlsx_path=report.local_xlsx_path, 80 | report_name=report.name, 81 | retrieval_tool_function_name=report.retrieval_tool_function_name, 82 | retrieval_tool_description=report.retrieval_tool_description, 83 | sql_llm=SQL_GEN_LLM, 84 | general_llm=LARGE_CONTEXT_INSTRUCT_LLM, 85 | embeddings=EMBEDDINGS, 86 | sql_fixup_examples_file=EXAMPLES_PATH / "sql_fixup_examples.yaml", 87 | sql_examples_file=EXAMPLES_PATH / "sql_examples.yaml", 88 | ) 89 | if report_retrieval_tool: 90 | tools.append(report_retrieval_tool) 91 | 92 | if use_conversation_tools: 93 | tools += get_generic_tools( 94 | llm=SMALL_CONTEXT_INSTRUCT_LLM, 95 | embeddings=EMBEDDINGS, 96 | answer_examples_file=EXAMPLES_PATH / "answer_examples.yaml", 97 | ) 98 | 99 | return tools 100 | 101 | 102 | class AgentInput(BaseModel): 103 | messages: List[Union[HumanMessage, AIMessage]] = Field( 104 | ..., 105 | description="The chat messages representing the current conversation.", 106 | ) 107 | 108 | 109 | def agent_output_to_string(state: AgentState) -> str: 110 | if state: 111 | streaming_output = state.get("generate_re_act") 112 | if streaming_output: 113 | state = streaming_output # type: ignore 114 | 115 | cited_answer = state.get("cited_answer") 116 | if cited_answer and cited_answer.is_final: 117 | return cited_answer.answer 118 | 119 | return "" 120 | 121 | 122 | def build_agent_runnable() -> Runnable: 123 | standalone_questions_chain = StandaloneQuestionChain( 124 | llm=LARGE_CONTEXT_INSTRUCT_LLM, 125 | embeddings=EMBEDDINGS, 126 | ) 127 | standalone_questions_chain.load_examples(EXAMPLES_PATH / "standalone_question_examples.yaml") 128 | 129 | return ( 130 | { 131 | "question": lambda x: get_question_from_messages(x["messages"]), 132 | "chat_history": lambda x: get_chat_history_from_messages(x["messages"]), 133 | } 134 | | ReActAgent( 135 | llm=LARGE_CONTEXT_INSTRUCT_LLM, 136 | embeddings=EMBEDDINGS, 137 | tools=build_tools(), 138 | standalone_question_chain=standalone_questions_chain, 139 | ).runnable() 140 | | RunnableLambda(agent_output_to_string) 141 | ).with_types( 142 | input_type=AgentInput, # type: ignore 143 | ) 144 | 145 | 146 | agent = build_agent_runnable() 147 | 148 | if __name__ == "__main__": 149 | if sys.gettrace(): 150 | # This code will only run if a debugger is attached 151 | 152 | async def test_async_stream(msg: str) -> str: 153 | output = "" 154 | async for s in agent.astream_log( 155 | { 156 | "messages": [HumanMessage(content=msg)], 157 | } 158 | ): 159 | print(s) 160 | 161 | return output 162 | 163 | output = asyncio.run(test_async_stream("Hello!")) 164 | print(output) 165 | -------------------------------------------------------------------------------- /docugami_kg_rag/green_examples/sql_fixup_examples.yaml: -------------------------------------------------------------------------------- 1 | - table_info: | 2 | CREATE TABLE "Service Agreements Summary" ( 3 | "File" TEXT, 4 | "Link to Document" TEXT, 5 | "Workers Compensation Insurance" TEXT, 6 | "Limit" TEXT, 7 | "Commercial General Liability Insurance" TEXT, 8 | "Technology Professional Liability Errors Omissions Policy" TEXT, 9 | "Excess Liability Umbrella Coverage" TEXT, 10 | "Services Agreement Date" TEXT, 11 | "Completion of the Services by Company Date" TEXT, 12 | "Charge" TEXT, 13 | "Initial Payment" TEXT, 14 | "Continue in Force And Effect Date" TEXT, 15 | "Non-refundable" TEXT, 16 | "Company" TEXT, 17 | "Client" TEXT, 18 | "Client Address" TEXT, 19 | "Magicsoft" TEXT, 20 | "Except" TEXT, 21 | "A Party" TEXT, 22 | "A Daily Basis" TEXT, 23 | "The Initial Understanding" TEXT, 24 | "Signature Page" TEXT 25 | ) 26 | sql_query: SELECT "File", "Link to Document", "Workers Compensation Insurance", "Limit", "Commercial General Liability Insurance", "Technology Professional Liability Errors Omissions Policy", "Excess Liability Umbrella Coverage", "Services Agreement Date", "Completion of the Services by Company Date", "Charge", "Initial Payment", "Continue in Force And Effect Date", "Non-refundable", "Company", "Client", "Client Address", "Magicsoft", "Except", "A Party", " 27 | exception: |- 28 | (pysqlite3.dbapi2.OperationalError) unrecognized token: """ 29 | [SQL: SELECT "File", "Link to Document", "Workers Compensation Insurance", "Limit", "Commercial General Liability Insurance", "Technology Professional Liability Errors Omissions Policy", "Excess Liability Umbrella Coverage", "Services Agreement Date", "Completion of the Services by Company Date", "Charge", "Initial Payment", "Continue in Force And Effect Date", "Non-refundable", "Company", "Client", "Client Address", "Magicsoft", "Except", "A Party", "] 30 | (Background on this error at: https://sqlalche.me/e/20/e3q8) 31 | fixed_sql_query: SELECT "File", "Link to Document", "Workers Compensation Insurance", "Limit", "Commercial General Liability Insurance", "Technology Professional Liability Errors Omissions Policy", "Excess Liability Umbrella Coverage", "Services Agreement Date", "Completion of the Services by Company Date", "Charge" FROM "Service Agreements Summary" 32 | 33 | - table_info: | 34 | CREATE TABLE Users ( 35 | "UserID" INTEGER PRIMARY KEY, 36 | "Username" TEXT, 37 | "Email" TEXT, 38 | "Password" TEXT, 39 | "DateJoined" TEXT 40 | ) 41 | sql_query: SELECT "UserID", "Username", "Email", "Password" FROM User 42 | exception: | 43 | (pysqlite3.dbapi2.OperationalError) no such table: User 44 | fixed_sql_query: SELECT "UserID", "Username", "Email", "Password" FROM Users 45 | 46 | - table_info: | 47 | CREATE TABLE Orders ( 48 | "OrderID" INTEGER PRIMARY KEY, 49 | "Product" TEXT, 50 | "Quantity" INTEGER, 51 | "Price" REAL 52 | ) 53 | sql_query: SELECT "OrderID", "Product", "Quantity", "Price" FROM Orders WHERE "Quantity" > 5 AND "Price" > 54 | exception: | 55 | (pysqlite3.dbapi2.OperationalError) incomplete input 56 | fixed_sql_query: SELECT "OrderID", "Product", "Quantity", "Price" FROM Orders WHERE "Quantity" > 5 AND "Price" > 100 57 | 58 | - table_info: | 59 | CREATE TABLE Employees ( 60 | "EmployeeID" INTEGER PRIMARY KEY, 61 | "FirstName" TEXT, 62 | "LastName" TEXT, 63 | "Position" TEXT 64 | ) 65 | sql_query: SELECT "EmployeeID", "FirstName", "LastName", "Position" FROM Employees WHERE "Position" = 'Manager 66 | exception: | 67 | (pysqlite3.dbapi2.OperationalError) near "Manager": syntax error 68 | fixed_sql_query: SELECT "EmployeeID", "FirstName", "LastName", "Position" FROM Employees WHERE "Position" = 'Manager' 69 | 70 | - table_info: | 71 | CREATE TABLE Transactions ( 72 | "TransactionID" INTEGER PRIMARY KEY, 73 | "Amount" REAL, 74 | "Date" TEXT 75 | ) 76 | sql_query: SELECT "TransactionID", "Amount", "Date" FROM Transactions ORDERY BY "Amount" DESC 77 | exception: | 78 | (pysqlite3.dbapi2.OperationalError) near "ORDERY": syntax error 79 | fixed_sql_query: SELECT "TransactionID", "Amount", "Date" FROM Transactions ORDER BY "Amount" DESC 80 | 81 | - table_info: | 82 | CREATE TABLE Products ( 83 | "ProductID" INTEGER PRIMARY KEY, 84 | "ProductName" TEXT, 85 | "Category" TEXT 86 | ) 87 | sql_query: SELECT "ProductID", "ProductName", "Category" FROM Products WHERE "Category" IN ('Electronics', 'Clothing 88 | exception: | 89 | (pysqlite3.dbapi2.OperationalError) near "Clothing": syntax error 90 | fixed_sql_query: SELECT "ProductID", "ProductName", "Category" FROM Products WHERE "Category" IN ('Electronics', 'Clothing') 91 | 92 | - table_info: | 93 | CREATE TABLE Customers ( 94 | "CustomerID" INTEGER PRIMARY KEY, 95 | "Name" TEXT, 96 | "Country" TEXT 97 | ) 98 | sql_query: SELECT "CustomerID", "Name", "Country" FROM Customers WHERE "Country" = 'USA' AND "Name" LIKE % John%' 99 | exception: | 100 | (pysqlite3.dbapi2.OperationalError) near "% John%": syntax error 101 | fixed_sql_query: SELECT "CustomerID", "Name", "Country" FROM Customers WHERE "Country" = 'USA' AND "Name" LIKE '%John%' 102 | 103 | - table_info: | 104 | CREATE TABLE Addresses ( 105 | "AddressID" INTEGER PRIMARY KEY, 106 | "Street" TEXT, 107 | "City" TEXT, 108 | "ZipCode" TEXT 109 | ) 110 | sql_query: SELECT "AddressID", "Street", "City", "ZipCode" FROM Addresses WHERE "ZipCode" = 12345 111 | exception: | 112 | (pysqlite3.dbapi2.OperationalError) datatype mismatch 113 | fixed_sql_query: SELECT "AddressID", "Street", "City", "ZipCode" FROM Addresses WHERE "ZipCode" = '12345' 114 | 115 | - table_info: | 116 | CREATE TABLE "Service Agreements Summary" ( 117 | "File" TEXT, 118 | "Link to Document" TEXT, 119 | "Workers Compensation Insurance" TEXT, 120 | "Limit" TEXT, 121 | "Commercial General Liability Insurance" TEXT, 122 | "Technology Professional Liability Errors Omissions Policy" TEXT, 123 | "Excess Liability Umbrella Coverage" TEXT, 124 | "Services Agreement Date" TEXT, 125 | "Completion of the Services by Company Date" TEXT, 126 | "Charge" TEXT, 127 | "Initial Payment" TEXT, 128 | "Continue in Force And Effect Date" TEXT, 129 | "Non-refundable" TEXT, 130 | "Company" TEXT, 131 | "Client" TEXT, 132 | "Client Address" TEXT, 133 | "Magicsoft" TEXT, 134 | "Except" TEXT, 135 | "A Party" TEXT, 136 | "A Daily Basis" TEXT, 137 | "The Initial Understanding" TEXT, 138 | "Signature Page" TEXT 139 | ) 140 | sql_query: SELECT Client, ExcessLiabilityUmbrellaCoverage FROM "Service Agreements Summary" ORDER BY ExcessLiabilityUmbrellaCoverage DESC LIMIT 1 141 | exception: |- 142 | (pysqlite3.dbapi2.OperationalError) no such column: ExcessLiabilityUmbrellaCoverage 143 | [SQL: SELECT Client, ExcessLiabilityUmbrellaCoverage FROM "Service Agreements Summary" ORDER BY ExcessLiabilityUmbrellaCoverage DESC LIMIT 1] 144 | (Background on this error at: https://sqlalche.me/e/20/e3q8) 145 | fixed_sql_query: SELECT "Client", "Excess Liability Umbrella Coverage" FROM "Service Agreements Summary" ORDER BY "Excess Liability Umbrella Coverage" DESC LIMIT 1 146 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # docugami-kg-rag 3 | 4 | This template contains a reference architecture for Retrieval Augmented Generation against a set of documents using Docugami's XML Knowledge Graph (KG-RAG). 5 | 6 | ## Video Walkthrough 7 | 8 | [![Docugami KG-RAG Walkthrough](https://img.youtube.com/vi/xOHOmL1NFMg/0.jpg)](https://www.youtube.com/watch?v=xOHOmL1NFMg) 9 | 10 | ## Setup 11 | 12 | ### Environment Setup 13 | 14 | You need to set some required environment variables before using your new app based on this template. These are used to index as well as run the application, and exceptions are raised if the following required environment variables are not set: 15 | 16 | 1. `OPENAI_API_KEY`: from the OpenAI platform. 17 | 1. `DOCUGAMI_API_KEY`: from the [Docugami Developer Playground](https://help.docugami.com/home/docugami-api) 18 | 19 | ```shell 20 | export OPENAI_API_KEY=... 21 | export DOCUGAMI_API_KEY=... 22 | ``` 23 | 24 | Finally, make sure that you run `poetry install --all-extras` (or select a specific set of extras, see pyproject.toml) to install dependencies. 25 | 26 | ### Process Documents in Docugami (before you use this template) 27 | 28 | Before you use this template, you must have some documents already processed in Docugami. Here's what you need to get started: 29 | 30 | 1. Create a [Docugami workspace](https://app.docugami.com/) (free trials available) 31 | 1. Create an access token via the Developer Playground for your workspace. [Detailed instructions](https://help.docugami.com/home/docugami-api). 32 | 1. Add your documents to Docugami for processing. There are two ways to do this: 33 | - Upload via the simple Docugami web experience. [Detailed instructions](https://help.docugami.com/home/adding-documents). 34 | - Upload via the Docugami API, specifically the [documents](https://api-docs.docugami.com/#tag/documents/operation/upload-document) endpoint. Code samples are available for python and JavaScript or you can use the [docugami](https://pypi.org/project/docugami/) python library. 35 | 36 | Once your documents are in Docugami, they are processed and organized into sets of similar documents, e.g. NDAs, Lease Agreements, and Service Agreements. Docugami is not limited to any particular types of documents, and the clusters created depend on your particular documents. You can [change the docset assignments](https://help.docugami.com/home/working-with-the-doc-sets-view) later if you wish. You can monitor file status in the simple Docugami webapp, or use a [webhook](https://api-docs.docugami.com/#tag/webhooks) to be informed when your documents are done processing. The [Docugami RAG over XML Knowledge Graphs (KG-RAG) Cookbook](https://github.com/langchain-ai/langchain/blob/master/cookbook/docugami_xml_kg_rag.ipynb) has end to end code to upload docs and wait for them to be processed, if you are interested. 37 | 38 | Once your documents are finished processing, you can index them in the following step. 39 | 40 | ## Usage 41 | 42 | ### Indexing 43 | 44 | Before you can run your app, you need to build your vector index. See [index.py](./index.py) which you can run via `poetry run python index.py` after setting the environment variables as specified above. The CLI will query docsets in the workspace corresponding to your `DOCUGAMI_API_KEY` and let you pick which one(s) you want to index. 45 | 46 | Indexing in this template uses the Docugami Loader for LangChain to create semantic chunks out of your documents. Refer to this [documentation](https://python.langchain.com/docs/integrations/document_loaders/docugami) for details. 47 | 48 | Note that if you previously ran indexing for the same docset, the index will not be recreated. If you want to force recreate the index (e.g. if you have new docs in the docset or changed your chunking config parameters) please specify `poetry run python index.py` 49 | 50 | ### Creating app 51 | To use this package, you should first have the LangChain CLI installed: 52 | 53 | ```shell 54 | pip install -U langchain-cli 55 | ``` 56 | 57 | To create a new LangChain project and install this as the only package, you can do: 58 | 59 | ```shell 60 | langchain app new my-app --package git+https://github.com/docugami/langchain-template-docugami-kg-rag.git 61 | ``` 62 | 63 | If you want to add this to an existing project, you can just run: 64 | 65 | ```shell 66 | langchain app add git+https://github.com/docugami/langchain-template-docugami-kg-rag.git 67 | ``` 68 | 69 | And add the following code to your `server.py` file: 70 | ```python 71 | from docugami_kg_rag import chain as docugami_kg_rag_chain 72 | 73 | add_routes(app, docugami_kg_rag, path="/docugami-kg-rag") 74 | ``` 75 | 76 | (Optional) Let's now configure LangSmith. 77 | LangSmith will help us trace, monitor and debug LangChain applications. 78 | You can sign up [here](https://smith.langchain.com/). 79 | 80 | ```shell 81 | export LANGCHAIN_API_KEY= 82 | export LANGCHAIN_PROJECT= # if not specified, defaults to "default" 83 | ``` 84 | 85 | ### Running app 86 | If you are inside the app directory, then you can spin up a LangServe instance directly by: 87 | 88 | ```shell 89 | langchain serve 90 | ``` 91 | 92 | This will start the FastAPI app with a server is running locally at 93 | [http://localhost:8000](http://localhost:8000) 94 | 95 | We can see all templates at [http://127.0.0.1:8000/docs](http://127.0.0.1:8000/docs) 96 | We can access the playground at [http://127.0.0.1:8000/docugami-kg-rag/playground](http://127.0.0.1:8000/docugami-kg-rag/playground) 97 | 98 | We can access the template from code with: 99 | 100 | ```python 101 | from langserve.client import RemoteRunnable 102 | 103 | runnable = RemoteRunnable("http://localhost:8000/docugami-kg-rag") 104 | ``` 105 | 106 | # Advanced Configuration 107 | 108 | ## Changing Default Configuration 109 | By default, this template uses OpenAI LLMs and Embeddings, plus ChromaDB as the vector database. This is specified in `docugami_kg_rag/config/__init__.py` in the following import lines at the top of the file: 110 | 111 | ```python 112 | from docugami_kg_rag.config.openai import * 113 | from docugami_kg_rag.config.chromadb import * 114 | ``` 115 | 116 | You can change these defaults by changing which config you import, e.g. you can import the `fireworksai.py` config to run OSS models in fireworksai (or create your own config) 117 | 118 | ## Using Local GPU 119 | Optionally, if using local embeddings or llms, make sure your local CUDA runtime is updated. You can run `torch.cuda.is_available()` in a python REPL to make sure, and if you need to install a specific version for your local CUDA driver you can run something like `poetry run pip3 install torch==1.13.1+cu117 --extra-index-url https://download.pytorch.org/whl/cu117` to update it. 120 | 121 | ## Using Redis 122 | 123 | If using redis as a vector database in your config, you can configure the vector store to use Redis. See documentation here: https://python.langchain.com/docs/integrations/vectorstores/redis 124 | 125 | One of the things you need to specify is the REDIS_URL. You may have an instance already running that you can point to, or for development you may want to deploy Redis locally: 126 | 127 | `docker run -d -p 6379:6379 -p 8001:8001 redis/redis-stack:latest` 128 | 129 | See documentation [here](https://python.langchain.com/docs/integrations/vectorstores/redis#redis-connection-url-examples) for how to configure the REDIS_URL. 130 | 131 | ## Improving the template 132 | 133 | If working on the template, you can run `poetry run langchain template serve --chat-playground` to test improvements. Make sure you run the `/playground` route of the localhost endpoint created. -------------------------------------------------------------------------------- /docugami_kg_rag/indexing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import random 4 | from pathlib import Path 5 | from typing import Dict, List, Optional 6 | 7 | import requests 8 | from docugami import Docugami 9 | from docugami_langchain.document_loaders.docugami import DocugamiLoader 10 | from docugami_langchain.retrievers.mappings import ( 11 | build_chunk_summary_mappings, 12 | build_doc_maps_from_chunks, 13 | build_full_doc_summary_mappings, 14 | ) 15 | from docugami_langchain.tools.reports import ( 16 | connect_to_db, 17 | excel_to_sqlite_connection, 18 | report_details_to_report_query_tool_description, 19 | report_name_to_report_query_tool_function_name, 20 | ) 21 | from docugami_langchain.tools.retrieval import ( 22 | docset_name_to_direct_retrieval_tool_function_name, 23 | summaries_to_direct_retrieval_tool_description, 24 | ) 25 | from langchain.storage.in_memory import InMemoryStore 26 | from langchain_core.documents import Document 27 | 28 | from docugami_kg_rag.config import ( 29 | DOCUGAMI_API_ENDPOINT, 30 | DOCUGAMI_API_KEY, 31 | EMBEDDINGS, 32 | EXAMPLES_PATH, 33 | INCLUDE_XML_TAGS, 34 | INDEXING_LOCAL_REPORT_DBS_ROOT, 35 | INDEXING_LOCAL_STATE_PATH, 36 | LARGE_CONTEXT_INSTRUCT_LLM, 37 | LLM_BATCH_SIZE, 38 | MAX_CHUNK_TEXT_LENGTH, 39 | MAX_FULL_DOCUMENT_TEXT_LENGTH, 40 | MIN_CHUNK_TEXT_LENGTH, 41 | MIN_LENGTH_TO_SUMMARIZE, 42 | PARENT_HIERARCHY_LEVELS, 43 | SMALL_CONTEXT_INSTRUCT_LLM, 44 | SUB_CHUNK_TABLES, 45 | del_vector_store_index, 46 | get_vector_store_index, 47 | init_vector_store_index, 48 | ) 49 | from docugami_kg_rag.state_models import LocalIndexState, ReportDetails 50 | 51 | HEADERS = {"Authorization": f"Bearer {DOCUGAMI_API_KEY}"} 52 | 53 | 54 | def read_all_local_index_state() -> Dict[str, LocalIndexState]: 55 | if not Path(INDEXING_LOCAL_STATE_PATH).is_file(): 56 | return {} # not found 57 | 58 | with open(INDEXING_LOCAL_STATE_PATH, "rb") as file: 59 | return pickle.load(file) 60 | 61 | 62 | def update_local_index( 63 | docset_id: str, 64 | full_doc_summaries_by_id: Dict[str, Document], 65 | chunks_by_id: Dict[str, Document], 66 | direct_tool_function_name: str, 67 | direct_tool_description: str, 68 | report_details: List[ReportDetails], 69 | ) -> None: 70 | """ 71 | Read and update local index 72 | """ 73 | 74 | state = read_all_local_index_state() 75 | 76 | full_doc_summaries_by_id_store = InMemoryStore() 77 | full_doc_summaries_by_id_store.mset(list(full_doc_summaries_by_id.items())) 78 | 79 | chunks_by_id_store = InMemoryStore() 80 | chunks_by_id_store.mset(list(chunks_by_id.items())) 81 | 82 | doc_index_state = LocalIndexState( 83 | full_doc_summaries_by_id=full_doc_summaries_by_id_store, 84 | chunks_by_id=chunks_by_id_store, 85 | retrieval_tool_function_name=direct_tool_function_name, 86 | retrieval_tool_description=direct_tool_description, 87 | reports=report_details, 88 | ) 89 | state[docset_id] = doc_index_state 90 | 91 | # Serialize state to disk (Deserialized in chain) 92 | store_local_path = Path(INDEXING_LOCAL_STATE_PATH) 93 | os.makedirs(os.path.dirname(store_local_path), exist_ok=True) 94 | with open(store_local_path, "wb") as file: 95 | pickle.dump(state, file) 96 | 97 | 98 | def populate_vector_index(docset_id: str, chunks: List[Document]) -> None: 99 | """ 100 | Create index if it does not exist, delete and recreate if already exists. 101 | """ 102 | 103 | vector_store = get_vector_store_index(docset_id, EMBEDDINGS) 104 | 105 | if vector_store is not None: 106 | print(f"Vector store index already exists for {docset_id}, existing index will be deleted and re-created") 107 | 108 | print(f"Embedding documents into vector store for {docset_id}...") 109 | 110 | vector_store = init_vector_store_index(docset_id, chunks, EMBEDDINGS) 111 | 112 | print(f"Done embedding documents into vector store for {docset_id}") 113 | 114 | 115 | def index_docset(docset_id: str, name: str) -> None: 116 | """ 117 | Indexes the given docset 118 | """ 119 | 120 | print(f"Indexing {name} (ID: {docset_id})") 121 | 122 | loader = DocugamiLoader( 123 | api=DOCUGAMI_API_ENDPOINT, 124 | docset_id=docset_id, 125 | file_paths=None, 126 | document_ids=None, 127 | min_text_length=MIN_CHUNK_TEXT_LENGTH, 128 | max_text_length=MAX_CHUNK_TEXT_LENGTH, # type: ignore 129 | sub_chunk_tables=SUB_CHUNK_TABLES, 130 | include_xml_tags=INCLUDE_XML_TAGS, 131 | parent_hierarchy_levels=PARENT_HIERARCHY_LEVELS, 132 | include_project_metadata_in_doc_metadata=False, # not used, so lighten the vector index 133 | ) 134 | 135 | chunks = loader.load() 136 | 137 | full_docs_by_id, parent_chunks_by_id = build_doc_maps_from_chunks(chunks) 138 | 139 | full_doc_summaries_by_id = build_full_doc_summary_mappings( 140 | docs_by_id=full_docs_by_id, 141 | llm=LARGE_CONTEXT_INSTRUCT_LLM, 142 | embeddings=EMBEDDINGS, 143 | min_length_to_summarize=MIN_LENGTH_TO_SUMMARIZE, 144 | max_length_cutoff=MAX_FULL_DOCUMENT_TEXT_LENGTH, 145 | include_xml_tags=INCLUDE_XML_TAGS, 146 | summarize_document_examples_file=EXAMPLES_PATH / "summarize_document_examples.yaml", 147 | batch_size=LLM_BATCH_SIZE, 148 | ) 149 | chunk_summaries_by_id = build_chunk_summary_mappings( 150 | docs_by_id=parent_chunks_by_id, 151 | llm=SMALL_CONTEXT_INSTRUCT_LLM, 152 | embeddings=EMBEDDINGS, 153 | min_length_to_summarize=MIN_LENGTH_TO_SUMMARIZE, 154 | max_length_cutoff=MAX_FULL_DOCUMENT_TEXT_LENGTH, 155 | include_xml_tags=INCLUDE_XML_TAGS, 156 | summarize_chunk_examples_file=EXAMPLES_PATH / "summarize_chunk_examples.yaml", 157 | batch_size=LLM_BATCH_SIZE, 158 | ) 159 | 160 | direct_tool_function_name = docset_name_to_direct_retrieval_tool_function_name(name) 161 | direct_tool_description = summaries_to_direct_retrieval_tool_description( 162 | name=name, 163 | summaries=random.sample( 164 | list(full_doc_summaries_by_id.values()), min(len(full_doc_summaries_by_id), 3) 165 | ), # give 3 randomly selected summaries summaries 166 | llm=SMALL_CONTEXT_INSTRUCT_LLM, 167 | embeddings=EMBEDDINGS, 168 | max_sample_documents_cutoff_length=MAX_FULL_DOCUMENT_TEXT_LENGTH, 169 | describe_document_set_examples_file=EXAMPLES_PATH / "describe_document_set_examples.yaml", 170 | ) 171 | report_details = build_report_details(docset_id) 172 | 173 | state = Path(INDEXING_LOCAL_STATE_PATH) 174 | if state.is_file() and state.exists(): 175 | os.remove(state) 176 | 177 | if get_vector_store_index(docset_id, EMBEDDINGS) is not None: 178 | del_vector_store_index(docset_id) 179 | 180 | update_local_index( 181 | docset_id=docset_id, 182 | full_doc_summaries_by_id=full_doc_summaries_by_id, 183 | chunks_by_id=parent_chunks_by_id, # we are using the parent chunks as chunks for expanded context 184 | direct_tool_function_name=direct_tool_function_name, 185 | direct_tool_description=direct_tool_description, 186 | report_details=report_details, 187 | ) 188 | 189 | populate_vector_index(docset_id, chunks=list(chunk_summaries_by_id.values())) 190 | 191 | 192 | def download_project_latest_xlsx(project_url: str, local_xlsx: Path) -> Optional[Path]: 193 | response = requests.request( 194 | "GET", 195 | project_url + "/artifacts/latest?name=spreadsheet.xlsx", 196 | headers=HEADERS, 197 | data={}, 198 | ) 199 | if response.ok: 200 | response_json = response.json()["artifacts"] 201 | xlsx_artifact = next( 202 | (item for item in response_json if str(item["name"]).lower().endswith(".xlsx")), 203 | None, 204 | ) 205 | if xlsx_artifact: 206 | artifact_id = xlsx_artifact["id"] 207 | response = requests.request( 208 | "GET", 209 | project_url + f"/artifacts/latest/{artifact_id}/content", 210 | headers=HEADERS, 211 | data={}, 212 | ) 213 | if response.ok: 214 | os.makedirs(str(local_xlsx.parent), exist_ok=True) 215 | with open(local_xlsx, "wb") as f: 216 | f.write(response.content) 217 | return local_xlsx 218 | else: 219 | raise Exception( 220 | f"Failed to download XLSX for {project_url}", 221 | ) 222 | return None # No artifacts found 223 | elif response.status_code == 404: 224 | # No artifacts found: this project has never been published 225 | return None 226 | else: 227 | raise Exception(f"Failed to download XLSX for {project_url}") 228 | 229 | 230 | def build_report_details(docset_id: str) -> List[ReportDetails]: 231 | docugami_client = Docugami() 232 | 233 | projects_response = docugami_client.projects.list() 234 | if not projects_response or not projects_response.projects: 235 | return [] # no projects found 236 | 237 | projects = [p for p in projects_response.projects if p.docset.id == docset_id] 238 | details: List[ReportDetails] = [] 239 | for project in projects: 240 | local_xlsx_path = download_project_latest_xlsx( 241 | project.url, Path(INDEXING_LOCAL_REPORT_DBS_ROOT) / f"{project.id}.xlsx" 242 | ) 243 | if local_xlsx_path: 244 | report_name = project.name or local_xlsx_path.name 245 | conn = excel_to_sqlite_connection(local_xlsx_path, report_name) 246 | db = connect_to_db(conn) 247 | table_info = db.get_table_info() 248 | details.append( 249 | ReportDetails( 250 | id=project.id, 251 | name=report_name, 252 | local_xlsx_path=local_xlsx_path, 253 | retrieval_tool_function_name=report_name_to_report_query_tool_function_name(project.name), 254 | retrieval_tool_description=report_details_to_report_query_tool_description(project.name, table_info), 255 | ) 256 | ) 257 | 258 | return details 259 | -------------------------------------------------------------------------------- /docugami_kg_rag/green_examples/summarize_document_examples.yaml: -------------------------------------------------------------------------------- 1 | - format: text 2 | contents: | 3 | National Transportation Safety Board Aviation Accident Final Report 4 | +-------------------------+---------------------------------------+-------------------+-------------+ 5 | | Location: | Talladega , AL | Accident Number: | MIA08CA030 | 6 | +-------------------------+---------------------------------------+-------------------+-------------+ 7 | | Date & Time: | 12/18/2007 , 0951 CST | Registration: | N2180E | 8 | +-------------------------+---------------------------------------+-------------------+-------------+ 9 | | Aircraft: | Cessna 172N | Aircraft Damage : | Substantial | 10 | +-------------------------+---------------------------------------+-------------------+-------------+ 11 | | Defining Event: | | Injuries: | 1 None | 12 | +-------------------------+---------------------------------------+-------------------+-------------+ 13 | | Flight Conducted Under: | Part 91 : General Aviation - Personal | | | 14 | +-------------------------+---------------------------------------+-------------------+-------------+ 15 | Analysis 16 | The student pilot stated that he entered left closed traffic for runway 21 at the Talladega Municipal Airport , Talladega , Alabama . He landed and started to prepare airplane for a touch- and-go. As he applied power he lost directional control of the airplane, went off the left side of the runway, and collided with a tree. The student pilot did not report any mechanical or flight control anomalies with the airplane. 17 | Probable Cause and Findings 18 | The National Transportation Safety Board determines the probable cause(s) of this accident to be: The pilot's failure to maintain directional control of the airplane. 19 | Findings 20 | Occurrence # 1 : LOSS OF CONTROL - ON GROUND /WATER 21 | Phase of Operation: LANDING - ROLL 22 | Findings 23 | 1 . (C) DIRECTIONAL CONTROL - NOT MAINTAINED - PILOT IN COMMAND 24 | • Occurrence # 2 : IN FLIGHT COLLISION WITH OBJECT Phase of Operation: LANDING - ROLL 25 | Findings 26 | 2 . OBJECT - TREE(S) 27 | Page 1 of 4 28 | Factual Information 29 | Student Pilot Information 30 | +---------------------------+----------------------------------------------------------------+-----------------------------------+----------------------------+ 31 | | Certificate: | Student | Age: | 47 , Male | 32 | +---------------------------+----------------------------------------------------------------+-----------------------------------+----------------------------+ 33 | | Airplane Rating(s): | Single-engine Land | Seat Occupied: | Left | 34 | +---------------------------+----------------------------------------------------------------+-----------------------------------+----------------------------+ 35 | | Other Aircraft Rating(s): | None | Restraint Used: | Seatbelt, Shoulder harness | 36 | +---------------------------+----------------------------------------------------------------+-----------------------------------+----------------------------+ 37 | | Instrument Rating(s): | None | Second Pilot Present : | No | 38 | +---------------------------+----------------------------------------------------------------+-----------------------------------+----------------------------+ 39 | | Instructor Rating(s): | None | Toxicology Performed: | No | 40 | +---------------------------+----------------------------------------------------------------+-----------------------------------+----------------------------+ 41 | | Medical Certification : | Class 3 None | Last FAA Medical Exam : | 06/01/2007 | 42 | +---------------------------+----------------------------------------------------------------+-----------------------------------+----------------------------+ 43 | | Occupational Pilot : | | Last Flight Review or Equivalent: | | 44 | +---------------------------+----------------------------------------------------------------+-----------------------------------+----------------------------+ 45 | | Flight Time: | 37 hours (Total, all aircraft), 37 hours (Total, this make and | | | 46 | | | model), 10 hours (Pilot In Command, all aircraft), 24 hours | | | 47 | | | (Last 90 days , all aircraft), 14 hours (Last 30 days , all | | | 48 | | | aircraft) | | | 49 | +---------------------------+----------------------------------------------------------------+-----------------------------------+----------------------------+ 50 | Aircraft and Owner/Operator Information 51 | +-------------------------------+----------------------------------+--------------------------------+-----------------+ 52 | | Aircraft Make : | Cessna | Registration: | N2180E | 53 | +-------------------------------+----------------------------------+--------------------------------+-----------------+ 54 | | Model/Series: | 172N | Aircraft Category : | Airplane | 55 | +-------------------------------+----------------------------------+--------------------------------+-----------------+ 56 | | Year of Manufacture: | | Amateur Built: | No | 57 | +-------------------------------+----------------------------------+--------------------------------+-----------------+ 58 | | Airworthiness Certificate : | Normal | Serial Number: | 172 - 71159 | 59 | +-------------------------------+----------------------------------+--------------------------------+-----------------+ 60 | | Landing Gear Type : | Tricycle | Seats: | 4 | 61 | +-------------------------------+----------------------------------+--------------------------------+-----------------+ 62 | | Date/Type of Last Inspection: | 07/01/2007 , Annual | Certified Max Gross Wt .: | 2300 lbs | 63 | +-------------------------------+----------------------------------+--------------------------------+-----------------+ 64 | | Time Since Last Inspection: | 0 Hours | Engines: | 1 Reciprocating | 65 | +-------------------------------+----------------------------------+--------------------------------+-----------------+ 66 | | Airframe Total Time : | 4937 Hours at time of accident | Engine Manufacturer : | Lycoming | 67 | +-------------------------------+----------------------------------+--------------------------------+-----------------+ 68 | | ELT: | Installed, not activated | Engine Model/Series: | O-320-HAD | 69 | +-------------------------------+----------------------------------+--------------------------------+-----------------+ 70 | | Registered Owner : | Redstone Arsenal Flying Activity | Rated Power : | 160 hp | 71 | +-------------------------------+----------------------------------+--------------------------------+-----------------+ 72 | | Operator: | Redstone Arsenal Flying Activity | Operating Certificate(s) Held: | None | 73 | +-------------------------------+----------------------------------+--------------------------------+-----------------+ 74 | Meteorological Information and Flight Plan 75 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 76 | | Conditions at Accident Site : | Visual Conditions | Condition of Light: | Day | 77 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 78 | | Observation Facility , Elevation: | KAFN, 509 ft msl | Distance from Accident Site : | 0 Nautical Miles | 79 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 80 | | Observation Time : | 0855 CST | Direction from Accident Site : | 0 ° | 81 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 82 | | Lowest Cloud Condition : | Clear | Visibility | 10 Miles | 83 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 84 | | Lowest Ceiling: | None | Visibility (RVR): | | 85 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 86 | | Wind Speed/Gusts: | 8 knots / | Turbulence Type Forecast/Actual: | / | 87 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 88 | | Wind Direction : | 170 ° | Turbulence Severity Forecast/Actual: | / | 89 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 90 | | Altimeter Setting: | 30.2 inches Hg | Temperature/Dew Point: | 5 °C / 15°C | 91 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 92 | | Precipitation and Obscuration: | No Obscuration; No Precipitation | | | 93 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 94 | | Departure Point : | Talladega, AL (HUA) | Type of Flight Plan Filed: | None | 95 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 96 | | Destination: | JAFFREY, NH (AFN) | Type of Clearance: | None | 97 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 98 | | Departure Time : | 0855 CST | Type of Airspace: | | 99 | +-----------------------------------+----------------------------------+--------------------------------------+------------------+ 100 | Airport Information 101 | +----------------------+-----------------------+----------------------------+-----------+ 102 | | Airport: | TALLADEGA MUNI (ASN ) | Runway Surface Type : | Asphalt | 103 | +----------------------+-----------------------+----------------------------+-----------+ 104 | | Airport Elevation : | 529 ft | Runway Surface Condition : | Dry | 105 | +----------------------+-----------------------+----------------------------+-----------+ 106 | | Runway Used: | 21 | IFR Approach : | Visual | 107 | +----------------------+-----------------------+----------------------------+-----------+ 108 | | Runway Length/Width: | 6001 ft / 100 ft | VFR Approach/Landing: | Full Stop | 109 | +----------------------+-----------------------+----------------------------+-----------+ 110 | Wreckage and Impact Information 111 | +----------------------+--------+----------------------+------------------------+ 112 | | Crew Injuries : | 1 None | Aircraft Damage : | Substantial | 113 | +----------------------+--------+----------------------+------------------------+ 114 | | Passenger Injuries : | N/A | Aircraft Fire : | None | 115 | +----------------------+--------+----------------------+------------------------+ 116 | | Ground Injuries : | N/A | Aircraft Explosion : | None | 117 | +----------------------+--------+----------------------+------------------------+ 118 | | Total Injuries : | 1 None | Latitude, Longitude: | 33.568611 , -86.050556 | 119 | +----------------------+--------+----------------------+------------------------+ 120 | Administrative Information 121 | +------------------------------------+------------------------------------------------------------------+---------------+------------+ 122 | | Investigator In Charge (IIC): | Eric H Alleyne | Report Date : | 03/31/2008 | 123 | +------------------------------------+------------------------------------------------------------------+---------------+------------+ 124 | | Additional Participating Persons : | James West ; Birmingham FSDO-09 ; Birmingham, AL | | | 125 | +------------------------------------+------------------------------------------------------------------+---------------+------------+ 126 | | Publish Date : | | | | 127 | +------------------------------------+------------------------------------------------------------------+---------------+------------+ 128 | | Note: Investigation Docket : | This accident report documents the factual circumstances of this | | | 129 | | | accident as described to the NTSB . NTSB accident and incident | | | 130 | | | dockets serve as permanent archival information for the NTSB ’s | | | 131 | | | investigations. Dockets released prior to June 1, 2009 are | | | 132 | | | publicly available from the NTSB’s Record Management Division at | | | 133 | | | pubinq@ntsb.gov , or at 800 - 877 - 6799 . Dockets released | | | 134 | | | after this date are available at http://dms.ntsb.gov/pubdms/. | | | 135 | +------------------------------------+------------------------------------------------------------------+---------------+------------+ 136 | The National Transportation Safety Board (NTSB ), established in 1967 , is an independent federal agency mandated by Congress through the Independent Safety Board Act of 1974 to investigate transportation accidents, determine the probable causes of the accidents, issue safety recommendations, study transportation safety issues, and evaluate the safety effectiveness of government agencies involved in transportation. The NTSB makes public its actions and decisions through accident reports, safety studies, special investigation reports, safety recommendations, and statistical reviews. 137 | The Independent Safety Board Act , as codified at 49 U.S.C. Section 1154(b), precludes the admission into evidence or use of any part of an NTSB report related to an incident or accident in a civil action for damages resulting from a matter mentioned in the report. A factual report that may be admissible under 49 U.S.C. § 1154(b) is available here. 138 | summary: | 139 | National Transportation Safety Board Aviation Accident Final Report Summary 140 | 141 | Basic Information: 142 | 143 | - Location: Talladega, AL 144 | - Date & Time: 12/18/2007, 0951 CST 145 | - Accident Number: MIA08CA030 146 | - Aircraft: Cessna 172N, Registration N2180E 147 | - Injuries: 1 None 148 | - Aircraft Damage: Substantial 149 | 150 | Analysis: 151 | The student pilot lost directional control during a touch-and-go on runway 21 at Talladega Municipal Airport, resulting in a collision with a tree. No mechanical issues were reported. 152 | 153 | Probable Cause: 154 | Failure to maintain directional control by the pilot. 155 | 156 | Aircraft and Operator Information: 157 | 158 | - Make/Model: Cessna 172N 159 | - Owner/Operator: Redstone Arsenal Flying Activity 160 | - Airworthiness: Normal 161 | - Engine: Lycoming O-320-HAD, 160 hp 162 | - Meteorological Information: 163 | - Visual conditions, clear skies, 10 miles visibility, and wind from 170° at 8 knots. 164 | 165 | Flight Plan: 166 | No flight plan filed, departure from Talladega, AL, intended for JAFFREY, NH. 167 | 168 | Airport Information: 169 | Talladega Muni (ASN), runway 21, asphalt, dry, 6001 ft long. 170 | 171 | Wreckage and Impact Information: 172 | The accident resulted in substantial aircraft damage but no fire or explosion. 173 | 174 | Administrative Information: 175 | - Investigator In Charge: Eric H. Alleyne 176 | - Report Date: 03/31/2008 177 | 178 | NTSB Overview: 179 | Established in 1967, the NTSB investigates transportation accidents, determines probable causes, issues safety recommendations, and evaluates the safety effectiveness of transportation entities. 180 | -------------------------------------------------------------------------------- /docugami_kg_rag/green_examples/sql_examples.yaml: -------------------------------------------------------------------------------- 1 | - question: When was the cardiva medical charter filed? 2 | table_info: | 3 | CREATE TABLE "Corporate Charters" ( 4 | "File" TEXT, 5 | "Link to Document" TEXT, 6 | "FILED Date" TEXT, 7 | "FILED Time" TEXT, 8 | "SR" REAL, 9 | "FileNumber" REAL, 10 | "Corporation Name" TEXT, 11 | "Registered Address" TEXT, 12 | "Shares of Common Stock" INTEGER, 13 | "Shares of Preferred Stock" INTEGER 14 | ) 15 | sql_query: SELECT "Corporation Name", "FILED Date" FROM "Corporate Charters" WHERE LOWER("Corporation Name") LIKE "%cardiva medical%"; 16 | sql_result: "[('Cardiva Medical, Inc.', '12/19/2017')]" 17 | answer: The cardiva medical charter was filed on 12/19/2017. 18 | query_explanation: "Retrieve the 'FILED Date' for the corporation with a name similar to 'cardiva medical'." 19 | 20 | - question: How many shares of common stock for Aisera, Inc? 21 | table_info: | 22 | CREATE TABLE "Corporate Charters" ( 23 | "File" TEXT, 24 | "Link to Document" TEXT, 25 | "FILED Date" TEXT, 26 | "FILED Time" TEXT, 27 | "SR" REAL, 28 | "FileNumber" REAL, 29 | "Corporation Name" TEXT, 30 | "Registered Address" TEXT, 31 | "Shares of Common Stock" INTEGER, 32 | "Shares of Preferred Stock" INTEGER 33 | ) 34 | sql_query: SELECT "Shares of Common Stock" FROM "Corporate Charters" WHERE LOWER("Corporation Name") LIKE "%aisera%"; 35 | sql_result: "[(90000000,)]" 36 | answer: Aisera, Inc. issued 90 million shares of common stock. 37 | query_explanation: "Retrieve the number of 'Shares of Common Stock' for the corporation with a name similar to 'Aisera, Inc'." 38 | 39 | - question: What is the average number of shares of common stock issued? 40 | table_info: | 41 | CREATE TABLE "Corporate Charters" ( 42 | "File" TEXT, 43 | "Link to Document" TEXT, 44 | "FILED Date" TEXT, 45 | "FILED Time" TEXT, 46 | "SR" REAL, 47 | "FileNumber" REAL, 48 | "Corporation Name" TEXT, 49 | "Registered Address" TEXT, 50 | "Shares of Common Stock" INTEGER, 51 | "Shares of Preferred Stock" INTEGER 52 | ) 53 | sql_query: SELECT AVG("Shares of Common Stock") FROM "Corporate Charters" 54 | sql_result: "[(44,970,044,)]" 55 | answer: 44,970,044 shares of common stock were issued on average. 56 | query_explanation: "Calculate the average of the 'Shares of Common Stock' column for all corporations." 57 | 58 | - question: Which company issued the most common stock? 59 | table_info: | 60 | CREATE TABLE "Corporate Charters" ( 61 | "File" TEXT, 62 | "Link to Document" TEXT, 63 | "FILED Date" TEXT, 64 | "FILED Time" TEXT, 65 | "SR" REAL, 66 | "FileNumber" REAL, 67 | "Corporation Name" TEXT, 68 | "Registered Address" TEXT, 69 | "Shares of Common Stock" INTEGER, 70 | "Shares of Preferred Stock" INTEGER 71 | ) 72 | sql_query: SELECT "Corporation Name", "Shares of Common Stock" FROM "Corporate Charters" ORDER BY "Shares of Common Stock" DESC LIMIT 1; 73 | sql_result: "[('Clearstory Data Inc.', 150000000)]" 74 | answer: Clearstory Data Inc. issued the most common stock with 150,000,000 shares. 75 | query_explanation: "Identify the corporation with the highest value in the 'Shares of Common Stock' column." 76 | 77 | - question: What are the top 7 companies that issued the most common stock? 78 | table_info: | 79 | CREATE TABLE "Corporate Charters" ( 80 | "File" TEXT, 81 | "Link to Document" TEXT, 82 | "FILED Date" TEXT, 83 | "FILED Time" TEXT, 84 | "SR" REAL, 85 | "FileNumber" REAL, 86 | "Corporation Name" TEXT, 87 | "Registered Address" TEXT, 88 | "Shares of Common Stock" INTEGER, 89 | "Shares of Preferred Stock" INTEGER 90 | ) 91 | sql_query: SELECT "Corporation Name", "Shares of Common Stock" FROM "Corporate Charters" ORDER BY "Shares of Common Stock" DESC LIMIT 7; 92 | sql_result: "[('Clearstory Data Inc.', 150000000), ('Cardiva Medical, Inc.', 112000000), ('Avi Networks, Inc.', 110000000), ('Bugcrowd Inc.', 108787009), ('Aisera, Inc.', 90000000), ('AVEGANT CORP', 80000000), ('CHECKR, INC.', 68000000)]" 93 | answer: The top 7 companies that issued the most common stock are Clearstory Data Inc., Cardiva Medical, Inc., Avi Networks, Inc., Bugcrowd Inc., Aisera, Inc., AVEGANT CORP, and CHECKR, INC. 94 | query_explanation: "List the top 7 corporations based on the values in the 'Shares of Common Stock' column." 95 | 96 | - question: When does the Medcore contract expire? 97 | table_info: | 98 | CREATE TABLE "SaaS Contracts" ( 99 | "File Name" TEXT, 100 | "Customer Number" TEXT, 101 | "Client Name" TEXT, 102 | "Effective Date" TEXT, 103 | "Term Expiry" TEXT, 104 | "Payment Terms" TEXT 105 | ) 106 | sql_query: SELECT "Term Expiry" FROM "SaaS contracts" WHERE LOWER("Client Name") LIKE "%medcore%"; 107 | sql_result: "[('2021-01-01 00:00:00',)]" 108 | answer: The Medcore contract expires on January 1, 2021 109 | query_explanation: "Retrieve the 'Term Expiry' date for the client with a name similar to 'Medcore'." 110 | 111 | - question: What were the total midmarket gross sales for Mexico in 2014? 112 | table_info: | 113 | CREATE TABLE "Financial Data" ( 114 | "Segment" TEXT, 115 | "Country" TEXT, 116 | "Product" TEXT, 117 | "Discount Band" TEXT, 118 | "Units Sold" REAL, 119 | "Manufacturing Price" INTEGER, 120 | "Sale Price" INTEGER, 121 | "Gross Sales" REAL, 122 | "Discounts" REAL, 123 | " Sales" REAL, 124 | "COGS" REAL, 125 | "Profit" REAL, 126 | "Date" TIMESTAMP, 127 | "Month Number" INTEGER, 128 | "Month Name" TEXT, 129 | "Year" INTEGER 130 | ) 131 | sql_query: SELECT SUM("Gross Sales") FROM "Financial Data" WHERE LOWER("Segment") LIKE("%midmarket%") AND LOWER("Country") LIKE "%mexico%" AND Year = 2014; 132 | sql_result: "[(451890.0,)]" 133 | answer: The total midmarket gross sales for Mexico in 2014 were 451,890. 134 | query_explanation: "Sum the 'Gross Sales' for entries in the 'Midmarket' segment in Mexico for the year 2014." 135 | 136 | - question: What can I ask about these documents? 137 | table_info: | 138 | CREATE TABLE "Financial Data" ( 139 | "Segment" TEXT, 140 | "Country" TEXT, 141 | "Product" TEXT, 142 | "Discount Band" TEXT, 143 | "Units Sold" REAL, 144 | "Manufacturing Price" INTEGER, 145 | "Sale Price" INTEGER, 146 | "Gross Sales" REAL, 147 | "Discounts" REAL, 148 | "Sales" REAL, 149 | "COGS" REAL, 150 | "Profit" REAL, 151 | "Date" TIMESTAMP, 152 | "Month Number" INTEGER, 153 | "Month Name" TEXT, 154 | "Year" INTEGER 155 | ) 156 | sql_query: SELECT * FROM "Financial Data" LIMIT 1; 157 | sql_result: "[('Government', 'Canada', 'Carretera', None, 1618.5, 3, 20, 32370.0, 0.0, 32370.0, 16185.0, 16185.0, '2014-01-01 00:00:00', 1, 'January', 2014)]" 158 | answer: You can ask various questions about the financial data related to these documents, for example questions related to the segment, country, country etc of various product sales. 159 | query_explanation: "Provide a sample entry from the 'Financial Data' report." 160 | 161 | - question: How many customers are not from Brazil? 162 | table_info: | 163 | CREATE TABLE "Customer" ( 164 | "CustomerId" INTEGER NOT NULL, 165 | "FirstName" NVARCHAR(40) NOT NULL, 166 | "LastName" NVARCHAR(20) NOT NULL, 167 | "Company" NVARCHAR(80), 168 | "Address" NVARCHAR(70), 169 | "City" NVARCHAR(40), 170 | "State" NVARCHAR(40), 171 | "Country" NVARCHAR(40), 172 | "PostalCode" NVARCHAR(10), 173 | "Phone" NVARCHAR(24), 174 | "Fax" NVARCHAR(24), 175 | "Email" NVARCHAR(60) NOT NULL, 176 | "SupportRepId" INTEGER, 177 | PRIMARY KEY ("CustomerId"), 178 | FOREIGN KEY("SupportRepId") REFERENCES "Employee" ("EmployeeId") 179 | ) 180 | sql_query: SELECT COUNT(*) FROM "Customer" WHERE NOT LOWER("Country") LIKE "%brazil%"; 181 | sql_result: "[(54,)]" 182 | answer: 54 customers are not from Brazil. 183 | query_explanation: "Count the number of entries in the 'Customer' report where the 'Country' is not Brazil." 184 | 185 | - question: list all the genres that start with 'r' 186 | table_info: | 187 | CREATE TABLE "Genre" ( 188 | "GenreId" INTEGER NOT NULL, 189 | "Name" NVARCHAR(120), 190 | PRIMARY KEY ("GenreId") 191 | ) 192 | sql_query: SELECT "Name" FROM "Genre" WHERE LOWER("Name") LIKE 'r%'; 193 | sql_result: "[('Rock',), ('Rock and Roll',), ('Reggae',), ('R&B/Soul',)]" 194 | answer: The genres that start with 'r' are Rock, Rock and Roll, Reggae and R&B/Soul. 195 | query_explanation: "List all entries from the 'Genre' report where the 'Name' starts with the letter 'r'." 196 | 197 | - question: Which client has the highest liability cap? 198 | table_info: | 199 | CREATE TABLE "Report_Services_preview.xlsx" ( 200 | "File" TEXT, 201 | "Link to Document" TEXT, 202 | "Workers Compensation Insurance" TEXT, 203 | "Limit" TEXT, 204 | "Commercial General Liability Insurance" TEXT, 205 | "Technology Professional Liability Errors Omissions Policy" TEXT, 206 | "Excess Liability Umbrella Coverage" TEXT, 207 | "Services Agreement Date" TEXT, 208 | "Completion of the Services by Company Date" TEXT, 209 | "Charge" TEXT, 210 | "Initial Payment" TEXT, 211 | "Continue in Force And Effect Date" TEXT, 212 | "Non-refundable" TEXT, 213 | "Company" TEXT, 214 | "Client" TEXT, 215 | "Client Address" TEXT, 216 | "Magicsoft" TEXT, 217 | "Except" TEXT, 218 | "A Party" TEXT, 219 | "A Daily Basis" TEXT, 220 | "The Initial Understanding" TEXT, 221 | "Signature Page" TEXT 222 | ) 223 | sql_query: SELECT "Client", "Excess Liability Umbrella Coverage" FROM "Report_Services_preview.xlsx" ORDER BY "Excess Liability Umbrella Coverage" DESC LIMIT 1 224 | sql_result: "[('Inity, Inc.', 'Excess Liability/Umbrella coverage with a limit of no less than $9,000,000 per occurrence and in the aggregate (such limit may be achieved through increase of limits in underlying policies to reach the level of coverage shown here). This policy shall name Client as an additional insured with...')]" 225 | answer: The client with the highest liability cap is Inity, Inc., with an excess liability/umbrella coverage limit of no less than $9,000,000 per occurrence and in the aggregate. 226 | query_explanation: "Identify the client with the highest value in the 'Excess Liability Umbrella Coverage' column." 227 | 228 | - question: Which client has the lowest initial payment? 229 | table_info: | 230 | CREATE TABLE "Report_Services_preview.xlsx" ( 231 | "File" TEXT, 232 | "Link to Document" TEXT, 233 | "Workers Compensation Insurance" TEXT, 234 | "Limit" TEXT, 235 | "Commercial General Liability Insurance" TEXT, 236 | "Technology Professional Liability Errors Omissions Policy" TEXT, 237 | "Excess Liability Umbrella Coverage" TEXT, 238 | "Services Agreement Date" TEXT, 239 | "Completion of the Services by Company Date" TEXT, 240 | "Charge" TEXT, 241 | "Initial Payment" TEXT, 242 | "Continue in Force And Effect Date" TEXT, 243 | "Non-refundable" TEXT, 244 | "Company" TEXT, 245 | "Client" TEXT, 246 | "Client Address" TEXT, 247 | "Magicsoft" TEXT, 248 | "Except" TEXT, 249 | "A Party" TEXT, 250 | "A Daily Basis" TEXT, 251 | "The Initial Understanding" TEXT, 252 | "Signature Page" TEXT 253 | ) 254 | sql_query: SELECT "Client", "Initial Payment" FROM "Report_Services_preview.xlsx" ORDER BY "Initial Payment" ASC LIMIT 1 255 | sql_result: "[('Kazuya, Inc.', 'Eighty Five Thousand Dollars ($85,000)')]" 256 | answer: The client with the lowest initial payment is Kazuya, Inc. with an initial payment of Eighty Five Thousand Dollars ($85,000). 257 | query_explanation: "Identify the client with the lowest value in the 'Initial Payment' column." 258 | 259 | - question: Which client's contract expires the earliest? 260 | table_info: | 261 | CREATE TABLE "Report_Services_preview.xlsx" ( 262 | "File" TEXT, 263 | "Link to Document" TEXT, 264 | "Workers Compensation Insurance" TEXT, 265 | "Limit" TEXT, 266 | "Commercial General Liability Insurance" TEXT, 267 | "Technology Professional Liability Errors Omissions Policy" TEXT, 268 | "Excess Liability Umbrella Coverage" TEXT, 269 | "Services Agreement Date" TEXT, 270 | "Completion of the Services by Company Date" TEXT, 271 | "Charge" TEXT, 272 | "Initial Payment" TEXT, 273 | "Continue in Force And Effect Date" TEXT, 274 | "Non-refundable" TEXT, 275 | "Company" TEXT, 276 | "Client" TEXT, 277 | "Client Address" TEXT, 278 | "Magicsoft" TEXT, 279 | "Except" TEXT, 280 | "A Party" TEXT, 281 | "A Daily Basis" TEXT, 282 | "The Initial Understanding" TEXT, 283 | "Signature Page" TEXT 284 | ) 285 | sql_query: SELECT "Client", "Completion of the Services by Company Date" FROM "Report_Services_preview.xlsx" ORDER BY "Completion of the Services by Company Date" LIMIT 1 286 | sql_result: "[('Bioplex, Inc.', 'February 15, 2022')]" 287 | answer: The contract for Bioplex, Inc. expires the earliest, on February 15, 2022. 288 | query_explanation: "Identify the client with the earliest date in the 'Completion of the Services by Company Date' column." 289 | 290 | - question: What is the highest Workers Comp Insurance? 291 | table_info: | 292 | CREATE TABLE "Service Agreements Summary" ( 293 | "File" TEXT, 294 | "Link to Document" TEXT, 295 | "Workers Compensation Insurance" TEXT, 296 | "Limit" TEXT, 297 | "Commercial General Liability Insurance" TEXT, 298 | "Technology Professional Liability Errors Omissions Policy" TEXT, 299 | "Excess Liability Umbrella Coverage" TEXT, 300 | "Services Agreement Date" TEXT, 301 | "Completion of the Services by Company Date" TEXT, 302 | "Charge" TEXT, 303 | "Initial Payment" TEXT, 304 | "Continue in Force And Effect Date" TEXT, 305 | "Non-refundable" TEXT, 306 | "Company" TEXT, 307 | "Client" TEXT, 308 | "Client Address" TEXT, 309 | "Magicsoft" TEXT, 310 | "Except" TEXT, 311 | "A Party" TEXT, 312 | "A Daily Basis" TEXT, 313 | "The Initial Understanding" TEXT, 314 | "Signature Page" TEXT 315 | ) 316 | sql_query: SELECT "Workers Compensation Insurance" FROM "Service Agreements Summary" ORDER BY "Workers Compensation Insurance" DESC LIMIT 1 317 | sql_result: "[('$5,500,000',)]" 318 | answer: The highest Workers Comp Insurance limit is $5,500,000. 319 | query_explanation: "Identify the highest value in the 'Workers Compensation Insurance' column." 320 | 321 | - question: What are typical working hours? 322 | table_info: | 323 | CREATE TABLE "Service Agreements Summary" ( 324 | "File" TEXT, 325 | "Link to Document" TEXT, 326 | "Workers Compensation Insurance" TEXT, 327 | "Limit" TEXT, 328 | "Commercial General Liability Insurance" TEXT, 329 | "Technology Professional Liability Errors Omissions Policy" TEXT, 330 | "Excess Liability Umbrella Coverage" TEXT, 331 | "Services Agreement Date" TEXT, 332 | "Completion of the Services by Company Date" TEXT, 333 | "Charge" TEXT, 334 | "Initial Payment" TEXT, 335 | "Continue in Force And Effect Date" TEXT, 336 | "Non-refundable" TEXT, 337 | "Company" TEXT, 338 | "Client" TEXT, 339 | "Client Address" TEXT, 340 | "Magicsoft" TEXT, 341 | "Except" TEXT, 342 | "A Party" TEXT, 343 | "A Daily Basis" TEXT, 344 | "The Initial Understanding" TEXT, 345 | "Signature Page" TEXT 346 | ) 347 | sql_query: SELECT "A Daily Basis" FROM "Service Agreements Summary" LIMIT 5 348 | sql_result: "[('Onsite Services. 2.1 Onsite visits will be charged on a daily basis (minimum 8 hours). 2.2 Time and expenses will be charged based on actuals unless otherwise described in an Order Form or accompanying SOW. 2.3 All work will be executed during regular working hours Monday-Friday 0800-1900. For...',), ('Onsite Services. 2.1 Onsite visits will be charged on a daily basis (minimum 8 hours). 2.2 Time and expenses will be charged based on actuals unless otherwise described in an Order Form or accompanying SOW. 2.3 All work will be executed during regular working hours Monday-Friday 0800-1900. For...',), ('Onsite Services. 2.1 Onsite visits will be charged on a daily basis (minimum 8 hours). 2.2 Time and expenses will be charged based on actuals unless otherwise described in an Order Form or accompanying SOW. 2.3 All work will be executed during regular working hours Monday-Friday 0800-1900. For...',), ('Onsite Services. 2.1 Onsite visits will be charged on a daily basis (minimum 8 hours). 2.2 Time and expenses will be charged based on actuals unless otherwise described in an Order Form or accompanying SOW. 2.3 All work will be executed during regular working hours Monday-Friday 0800-1900. For...',), ('Onsite Services. 2.1 Onsite visits will be charged on a daily basis (minimum 8 hours). 2.2 Time and expenses will be charged based on actuals unless otherwise described in an Order Form or accompanying SOW. 2.3 All work will be executed during regular working hours Monday-Friday 0800-1900. For...',)]" 349 | answer: "Regular working hours are Monday-Friday 0800-1900" 350 | query_explanation: "Extract the typical working hours mentioned in the 'A Daily Basis' column." 351 | 352 | - question: What is the total number of clients with excess liability coverage? 353 | table_info: | 354 | CREATE TABLE "Report_Services_preview.xlsx" ( 355 | "File" TEXT, 356 | "Link to Document" TEXT, 357 | "Workers Compensation Insurance" TEXT, 358 | "Limit" TEXT, 359 | "Commercial General Liability Insurance" TEXT, 360 | "Technology Professional Liability Errors Omissions Policy" TEXT, 361 | "Excess Liability Umbrella Coverage" TEXT, 362 | "Services Agreement Date" TEXT, 363 | "Completion of the Services by Company Date" TEXT, 364 | "Charge" TEXT, 365 | "Initial Payment" TEXT, 366 | "Continue in Force And Effect Date" TEXT, 367 | "Non-refundable" TEXT, 368 | "Company" TEXT, 369 | "Client" TEXT, 370 | "Client Address" TEXT, 371 | "Magicsoft" TEXT, 372 | "Except" TEXT, 373 | "A Party" TEXT, 374 | "A Daily Basis" TEXT, 375 | "The Initial Understanding" TEXT, 376 | "Signature Page" TEXT 377 | ) 378 | sql_query: SELECT COUNT("Client") FROM "Report_Services_preview.xlsx" WHERE "Excess Liability Umbrella Coverage" IS NOT NULL AND TRIM("Excess Liability Umbrella Coverage") <> ''; 379 | sql_result: "[(25,)]" 380 | answer: "25 clients have Excess Liability Umbrella Coverage" 381 | query_explanation: "Count the number of clients that have excess liability umbrella coverage" -------------------------------------------------------------------------------- /notebooks/eval-sec-10-q.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# SEC 10-Q Eval\n", 8 | "\n", 9 | "Evaluating Docugami KG-RAG against OpenAI Assistants Retrieval for this dataset: https://github.com/docugami/KG-RAG-datasets/tree/main/sec-10-q" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": {}, 15 | "source": [ 16 | "## Set up Eval" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": null, 22 | "metadata": {}, 23 | "outputs": [], 24 | "source": [ 25 | "%load_ext autoreload\n", 26 | "%autoreload 2" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "metadata": {}, 33 | "outputs": [], 34 | "source": [ 35 | "!rm -rf temp\n", 36 | "!git clone https://github.com/docugami/KG-RAG-datasets.git temp" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": 3, 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [ 45 | "import os\n", 46 | "from pathlib import Path\n", 47 | "from datetime import datetime\n", 48 | "\n", 49 | "# Important: Create your OpenAI assistant via https://platform.openai.com/playground\n", 50 | "# and put the assistant ID here. Make sure you upload the identical set of\n", 51 | "# files listed below (these files will be uploaded automatically to Docugami)\n", 52 | "OPENAI_ASSISTANT_ID = \"asst_qY1M0SeFYlmqkEZsMVZX2VAK\"\n", 53 | "\n", 54 | "DOCSET_NAME = \"SEC 10Q Filings\"\n", 55 | "EVAL_NAME = DOCSET_NAME + \" \" + datetime.now().strftime(\"%Y-%m-%d\")\n", 56 | "FILES_DIR = Path(os.getcwd()) / \"temp/sec-10-q/data/v1/docs\"\n", 57 | "FILE_NAMES = [\n", 58 | " \"2022 Q3 AAPL.pdf\",\n", 59 | " \"2022 Q3 AMZN.pdf\",\n", 60 | " \"2022 Q3 INTC.pdf\",\n", 61 | " \"2022 Q3 MSFT.pdf\",\n", 62 | " \"2022 Q3 NVDA.pdf\",\n", 63 | " \"2023 Q1 AAPL.pdf\",\n", 64 | " \"2023 Q1 AMZN.pdf\",\n", 65 | " \"2023 Q1 INTC.pdf\",\n", 66 | " \"2023 Q1 MSFT.pdf\",\n", 67 | " \"2023 Q1 NVDA.pdf\",\n", 68 | " \"2023 Q2 AAPL.pdf\",\n", 69 | " \"2023 Q2 AMZN.pdf\",\n", 70 | " \"2023 Q2 INTC.pdf\",\n", 71 | " \"2023 Q2 MSFT.pdf\",\n", 72 | " \"2023 Q2 NVDA.pdf\",\n", 73 | " \"2023 Q3 AAPL.pdf\",\n", 74 | " \"2023 Q3 AMZN.pdf\",\n", 75 | " \"2023 Q3 INTC.pdf\",\n", 76 | " \"2023 Q3 MSFT.pdf\",\n", 77 | " \"2023 Q3 NVDA.pdf\",\n", 78 | "]\n", 79 | "\n", 80 | "# Using mini set to save cost while developing, use full set for actual runs (~$300 per run in OpenAI costs per run)\n", 81 | "GROUND_TRUTH_CSV = Path(os.getcwd()) / \"temp/sec-10-q/data/v1/qna_data_mini.csv\"\n", 82 | "\n", 83 | "# We will run each experiment multiple times and average,\n", 84 | "# since results vary slightly over runs\n", 85 | "PER_EXPERIMENT_RUN_COUNT = 5\n", 86 | "\n", 87 | "# Note: Please specify ~6 (or more!) similar files to process together as a document set\n", 88 | "# This is currently a requirement for Docugami to automatically detect motifs\n", 89 | "# across the document set to generate a semantic XML Knowledge Graph.\n", 90 | "assert len(FILE_NAMES) >= 6, \"Please provide at least 6 files\"" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": 10, 96 | "metadata": {}, 97 | "outputs": [], 98 | "source": [ 99 | "import pandas as pd\n", 100 | "from langsmith import Client\n", 101 | "\n", 102 | "# Read\n", 103 | "df = pd.read_csv(GROUND_TRUTH_CSV)\n", 104 | "\n", 105 | "# Dataset\n", 106 | "client = Client()\n", 107 | "dataset_name = EVAL_NAME\n", 108 | "existing_datasets = list(client.list_datasets(dataset_name=dataset_name))\n", 109 | "if existing_datasets:\n", 110 | " # read existing dataset\n", 111 | " dataset = client.read_dataset(dataset_name=dataset_name)\n", 112 | "else:\n", 113 | " dataset = client.create_dataset(dataset_name=dataset_name)\n", 114 | " # Populate dataset\n", 115 | " for _, row in df.iterrows():\n", 116 | " q = row[\"Question\"]\n", 117 | " a = row[\"Answer\"]\n", 118 | " client.create_example(\n", 119 | " inputs={\"question\": q}, outputs={\"answer\": a}, dataset_id=dataset.id\n", 120 | " )" 121 | ] 122 | }, 123 | { 124 | "cell_type": "markdown", 125 | "metadata": {}, 126 | "source": [ 127 | "## Set up Docugami KG-RAG" 128 | ] 129 | }, 130 | { 131 | "cell_type": "markdown", 132 | "metadata": {}, 133 | "source": [ 134 | "#### Upload files to Docugami" 135 | ] 136 | }, 137 | { 138 | "cell_type": "code", 139 | "execution_count": null, 140 | "metadata": {}, 141 | "outputs": [], 142 | "source": [ 143 | "from docugami import Docugami\n", 144 | "from docugami.lib.upload import upload_to_named_docset, wait_for_dgml\n", 145 | "\n", 146 | "dg_client = Docugami()\n", 147 | "file_paths = [FILES_DIR / file_name for file_name in FILE_NAMES]\n", 148 | "\n", 149 | "# Files will not be re-uploaded if they were previously uploaded (based on name)\n", 150 | "dg_docs = upload_to_named_docset(dg_client, file_paths, DOCSET_NAME)\n", 151 | "\n", 152 | "docset_id = \"\"\n", 153 | "docset_name = \"\"\n", 154 | "for doc in dg_docs:\n", 155 | " if not docset_id:\n", 156 | " docset_id = doc.docset.id\n", 157 | " else:\n", 158 | " # all docs must be in the same docset\n", 159 | " assert docset_id == doc.docset.id\n", 160 | "\n", 161 | " if not docset_name:\n", 162 | " docset_name = dg_client.docsets.retrieve(doc.docset.id).name" 163 | ] 164 | }, 165 | { 166 | "cell_type": "code", 167 | "execution_count": null, 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "# Wait for files to finish processing (OCR, and zero-shot creation of XML knowledge graph)\n", 172 | "\n", 173 | "# Note: This can take some time on the free docugami tier (up to ~20 mins). Please contact us for faster paid plans.\n", 174 | "wait_for_dgml(dg_client, dg_docs)" 175 | ] 176 | }, 177 | { 178 | "cell_type": "code", 179 | "execution_count": null, 180 | "metadata": {}, 181 | "outputs": [], 182 | "source": [ 183 | "# Run indexing\n", 184 | "from docugami_kg_rag.indexing import index_docset\n", 185 | "\n", 186 | "assert docset_id\n", 187 | "assert docset_name\n", 188 | "\n", 189 | "# Note: This can take some time since it is embedding and creating summaries for all the docs and chunks\n", 190 | "index_docset(docset_id=docset_id, name=docset_name)" 191 | ] 192 | }, 193 | { 194 | "cell_type": "markdown", 195 | "metadata": {}, 196 | "source": [ 197 | "#### Create Docugami Agent" 198 | ] 199 | }, 200 | { 201 | "cell_type": "code", 202 | "execution_count": 6, 203 | "metadata": {}, 204 | "outputs": [ 205 | { 206 | "name": "stderr", 207 | "output_type": "stream", 208 | "text": [ 209 | "/root/Source/github/langchain-template-docugami-kg-rag/.venv/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", 210 | " from .autonotebook import tqdm as notebook_tqdm\n" 211 | ] 212 | }, 213 | { 214 | "name": "stdout", 215 | "output_type": "stream", 216 | "text": [ 217 | "Loading default rankgpt3 model for language en\n", 218 | "Loading RankGPTRanker model gpt-3.5-turbo\n" 219 | ] 220 | } 221 | ], 222 | "source": [ 223 | "from docugami_kg_rag.agent import build_agent_runnable\n", 224 | "from langchain_core.messages import HumanMessage\n", 225 | "\n", 226 | "def predict_docugami_agent(input: dict, config: dict = None) -> str:\n", 227 | " docugami_agent = build_agent_runnable()\n", 228 | " question = input[\"question\"]\n", 229 | " return docugami_agent.invoke(\n", 230 | " {\n", 231 | " \"messages\": [HumanMessage(content=question)],\n", 232 | " }\n", 233 | " )" 234 | ] 235 | }, 236 | { 237 | "cell_type": "code", 238 | "execution_count": 8, 239 | "metadata": {}, 240 | "outputs": [ 241 | { 242 | "name": "stderr", 243 | "output_type": "stream", 244 | "text": [ 245 | "Parent run c43a0a12-b825-4ed0-bd80-cad6a4a57abc not found for run f4b7d6cd-06f4-43b1-a6b3-5fc8157708d3. Treating as a root run.\n" 246 | ] 247 | }, 248 | { 249 | "data": { 250 | "text/plain": [ 251 | "'The information provided does not specify the exact amount Microsoft spent on operating expenses (opex) for the latest quarter ended September 30, 2023, but it mentions that the operating expenses increased by $119 million, marking a 2% increase from the previous period. To find the exact amount spent on opex, one would need to look at the specific figures from the previous period and apply the mentioned increase.'" 252 | ] 253 | }, 254 | "execution_count": 8, 255 | "metadata": {}, 256 | "output_type": "execute_result" 257 | } 258 | ], 259 | "source": [ 260 | "# Test the agent to make sure it is working\n", 261 | "predict_docugami_agent({\"question\": \"How much did Microsoft spend for opex in the latest quarter?\"})" 262 | ] 263 | }, 264 | { 265 | "cell_type": "markdown", 266 | "metadata": {}, 267 | "source": [ 268 | "## Set up OpenAI Assistants Retrieval" 269 | ] 270 | }, 271 | { 272 | "cell_type": "markdown", 273 | "metadata": {}, 274 | "source": [ 275 | "### Create OpenAI Agent\n", 276 | "\n", 277 | "Please go to https://platform.openai.com/playground and create your agent. " 278 | ] 279 | }, 280 | { 281 | "cell_type": "code", 282 | "execution_count": 4, 283 | "metadata": {}, 284 | "outputs": [], 285 | "source": [ 286 | "from langchain.agents.openai_assistant import OpenAIAssistantRunnable\n", 287 | "\n", 288 | "def predict_openai_agent(input: dict, config: dict = None) -> str:\n", 289 | " openai_agent = OpenAIAssistantRunnable(assistant_id=OPENAI_ASSISTANT_ID, as_agent=True).with_config(config)\n", 290 | " question = input[\"question\"]\n", 291 | " result = openai_agent.invoke({\"content\": question})\n", 292 | "\n", 293 | " return result.return_values[\"output\"]" 294 | ] 295 | }, 296 | { 297 | "cell_type": "code", 298 | "execution_count": 5, 299 | "metadata": {}, 300 | "outputs": [ 301 | { 302 | "data": { 303 | "text/plain": [ 304 | "\"Microsoft's operating expenses for the latest quarter, which ended on September 30, 2023, increased by $168 million or 1% compared to the previous year.\\n\\nSOURCE(S): 2023 Q3 MSFT.pdf \"" 305 | ] 306 | }, 307 | "execution_count": 5, 308 | "metadata": {}, 309 | "output_type": "execute_result" 310 | } 311 | ], 312 | "source": [ 313 | "# Test the agent to make sure it is working\n", 314 | "predict_openai_agent({\"question\": \"How much did Microsoft spend for opex in the latest quarter?\"})" 315 | ] 316 | }, 317 | { 318 | "cell_type": "markdown", 319 | "metadata": {}, 320 | "source": [ 321 | "## Run Evals\n" 322 | ] 323 | }, 324 | { 325 | "cell_type": "code", 326 | "execution_count": 11, 327 | "metadata": {}, 328 | "outputs": [ 329 | { 330 | "name": "stdout", 331 | "output_type": "stream", 332 | "text": [ 333 | "View the evaluation results for project 'docugami_kg_rag_zero_shot_1def70be-e363-4459-bf3d-aaf6194e8bd0' at:\n", 334 | "https://smith.langchain.com/o/530c4d06-5640-4c0f-94fe-0be7b769531f/datasets/a5db8a49-d0eb-4150-83e4-68bf08ad8ebf/compare?selectedSessions=84e6f6e4-6ef8-46e9-8c24-61b9941fa65c\n", 335 | "\n", 336 | "View all tests for Dataset SEC 10Q Filings 2024-05-03 at:\n", 337 | "https://smith.langchain.com/o/530c4d06-5640-4c0f-94fe-0be7b769531f/datasets/a5db8a49-d0eb-4150-83e4-68bf08ad8ebf\n", 338 | "[> ] 0/9" 339 | ] 340 | }, 341 | { 342 | "name": "stderr", 343 | "output_type": "stream", 344 | "text": [ 345 | "Parent run 7d23199c-b4a2-4aa5-b10d-4981e912bf04 not found for run 1a663ee0-7a83-427c-b4ca-e10e1d234e6a. Treating as a root run.\n", 346 | "Parent run ff0b5085-7a78-47f6-940c-ff3b56f17f43 not found for run 4b20bd8c-eb81-4ef0-a120-8e4528f71df2. Treating as a root run.\n" 347 | ] 348 | }, 349 | { 350 | "name": "stdout", 351 | "output_type": "stream", 352 | "text": [ 353 | "[----------> ] 2/9" 354 | ] 355 | }, 356 | { 357 | "name": "stderr", 358 | "output_type": "stream", 359 | "text": [ 360 | "Parent run 188a3636-5427-465e-ab1d-b853199e483d not found for run c9c7073f-fcb2-477f-8b8e-ddc5717608bc. Treating as a root run.\n", 361 | "Parent run 670fec85-bd3f-45d0-8d19-813bc9018eda not found for run f23a87b5-8369-4ff1-9538-953d5bb5f44b. Treating as a root run.\n" 362 | ] 363 | }, 364 | { 365 | "name": "stdout", 366 | "output_type": "stream", 367 | "text": [ 368 | "[----------------> ] 3/9" 369 | ] 370 | }, 371 | { 372 | "name": "stderr", 373 | "output_type": "stream", 374 | "text": [ 375 | "Parent run 0f229247-54c7-47e2-bf03-7892064ba501 not found for run 6a175ad3-b0a0-40ac-b8d5-dab469bd9e64. Treating as a root run.\n" 376 | ] 377 | }, 378 | { 379 | "name": "stdout", 380 | "output_type": "stream", 381 | "text": [ 382 | "[---------------------> ] 4/9" 383 | ] 384 | }, 385 | { 386 | "name": "stderr", 387 | "output_type": "stream", 388 | "text": [ 389 | "Parent run a729d3af-8ac2-4e6f-bef7-17a5da57688c not found for run 3f2974b3-f1dd-4c9f-9813-d06c59fc7803. Treating as a root run.\n" 390 | ] 391 | }, 392 | { 393 | "name": "stdout", 394 | "output_type": "stream", 395 | "text": [ 396 | "[---------------------------> ] 5/9" 397 | ] 398 | }, 399 | { 400 | "name": "stderr", 401 | "output_type": "stream", 402 | "text": [ 403 | "Parent run db8dc473-881f-407d-85c7-bd34fe64dd68 not found for run 476a438d-35ff-439a-a5f7-6b2b5324e279. Treating as a root run.\n" 404 | ] 405 | }, 406 | { 407 | "name": "stdout", 408 | "output_type": "stream", 409 | "text": [ 410 | "[--------------------------------> ] 6/9" 411 | ] 412 | }, 413 | { 414 | "name": "stderr", 415 | "output_type": "stream", 416 | "text": [ 417 | "Parent run 73899a93-d0de-45f0-9458-9fa6ef4acaeb not found for run 97164282-e83b-4ed5-88a4-76d128ef54cd. Treating as a root run.\n" 418 | ] 419 | }, 420 | { 421 | "name": "stdout", 422 | "output_type": "stream", 423 | "text": [ 424 | "[-------------------------------------------> ] 8/9" 425 | ] 426 | }, 427 | { 428 | "name": "stderr", 429 | "output_type": "stream", 430 | "text": [ 431 | "Parent run e116b345-5838-4118-b0df-1caeee971c86 not found for run 98168214-859a-4e11-8a87-82d7e6ce0894. Treating as a root run.\n" 432 | ] 433 | }, 434 | { 435 | "name": "stdout", 436 | "output_type": "stream", 437 | "text": [ 438 | "[------------------------------------------------->] 9/9" 439 | ] 440 | }, 441 | { 442 | "data": { 443 | "text/html": [ 444 | "

Experiment Results:

" 445 | ], 446 | "text/plain": [ 447 | "" 448 | ] 449 | }, 450 | "metadata": {}, 451 | "output_type": "display_data" 452 | }, 453 | { 454 | "data": { 455 | "text/html": [ 456 | "
\n", 457 | "\n", 470 | "\n", 471 | " \n", 472 | " \n", 473 | " \n", 474 | " \n", 475 | " \n", 476 | " \n", 477 | " \n", 478 | " \n", 479 | " \n", 480 | " \n", 481 | " \n", 482 | " \n", 483 | " \n", 484 | " \n", 485 | " \n", 486 | " \n", 487 | " \n", 488 | " \n", 489 | " \n", 490 | " \n", 491 | " \n", 492 | " \n", 493 | " \n", 494 | " \n", 495 | " \n", 496 | " \n", 497 | " \n", 498 | " \n", 499 | " \n", 500 | " \n", 501 | " \n", 502 | " \n", 503 | " \n", 504 | " \n", 505 | " \n", 506 | " \n", 507 | " \n", 508 | " \n", 509 | " \n", 510 | " \n", 511 | " \n", 512 | " \n", 513 | " \n", 514 | " \n", 515 | " \n", 516 | " \n", 517 | " \n", 518 | " \n", 519 | " \n", 520 | " \n", 521 | " \n", 522 | " \n", 523 | " \n", 524 | " \n", 525 | " \n", 526 | " \n", 527 | " \n", 528 | " \n", 529 | " \n", 530 | " \n", 531 | " \n", 532 | " \n", 533 | " \n", 534 | " \n", 535 | " \n", 536 | " \n", 537 | " \n", 538 | " \n", 539 | " \n", 540 | " \n", 541 | " \n", 542 | " \n", 543 | " \n", 544 | " \n", 545 | " \n", 546 | " \n", 547 | " \n", 548 | " \n", 549 | " \n", 550 | " \n", 551 | " \n", 552 | " \n", 553 | " \n", 554 | " \n", 555 | " \n", 556 | " \n", 557 | " \n", 558 | " \n", 559 | "
feedback.correctnesserrorexecution_timerun_id
count9.00000009.0000009
uniqueNaN0NaN9
topNaNNaNNaN2c8c1f77-99cf-4863-8c89-646ad3ec96aa
freqNaNNaNNaN1
mean0.555556NaN13.302264NaN
std0.527046NaN4.305790NaN
min0.000000NaN9.893197NaN
25%0.000000NaN10.643519NaN
50%1.000000NaN11.694967NaN
75%1.000000NaN13.202559NaN
max1.000000NaN22.169920NaN
\n", 560 | "
" 561 | ], 562 | "text/plain": [ 563 | " feedback.correctness error execution_time \\\n", 564 | "count 9.000000 0 9.000000 \n", 565 | "unique NaN 0 NaN \n", 566 | "top NaN NaN NaN \n", 567 | "freq NaN NaN NaN \n", 568 | "mean 0.555556 NaN 13.302264 \n", 569 | "std 0.527046 NaN 4.305790 \n", 570 | "min 0.000000 NaN 9.893197 \n", 571 | "25% 0.000000 NaN 10.643519 \n", 572 | "50% 1.000000 NaN 11.694967 \n", 573 | "75% 1.000000 NaN 13.202559 \n", 574 | "max 1.000000 NaN 22.169920 \n", 575 | "\n", 576 | " run_id \n", 577 | "count 9 \n", 578 | "unique 9 \n", 579 | "top 2c8c1f77-99cf-4863-8c89-646ad3ec96aa \n", 580 | "freq 1 \n", 581 | "mean NaN \n", 582 | "std NaN \n", 583 | "min NaN \n", 584 | "25% NaN \n", 585 | "50% NaN \n", 586 | "75% NaN \n", 587 | "max NaN " 588 | ] 589 | }, 590 | "metadata": {}, 591 | "output_type": "display_data" 592 | }, 593 | { 594 | "name": "stdout", 595 | "output_type": "stream", 596 | "text": [ 597 | "View the evaluation results for project 'openai_assistant_retrieval_1def70be-e363-4459-bf3d-aaf6194e8bd0' at:\n", 598 | "https://smith.langchain.com/o/530c4d06-5640-4c0f-94fe-0be7b769531f/datasets/a5db8a49-d0eb-4150-83e4-68bf08ad8ebf/compare?selectedSessions=49dc2afa-de66-43e4-8a56-605989a9f5e0\n", 599 | "\n", 600 | "View all tests for Dataset SEC 10Q Filings 2024-05-03 at:\n", 601 | "https://smith.langchain.com/o/530c4d06-5640-4c0f-94fe-0be7b769531f/datasets/a5db8a49-d0eb-4150-83e4-68bf08ad8ebf\n", 602 | "[------------------------------------------------->] 9/9" 603 | ] 604 | }, 605 | { 606 | "data": { 607 | "text/html": [ 608 | "

Experiment Results:

" 609 | ], 610 | "text/plain": [ 611 | "" 612 | ] 613 | }, 614 | "metadata": {}, 615 | "output_type": "display_data" 616 | }, 617 | { 618 | "data": { 619 | "text/html": [ 620 | "
\n", 621 | "\n", 634 | "\n", 635 | " \n", 636 | " \n", 637 | " \n", 638 | " \n", 639 | " \n", 640 | " \n", 641 | " \n", 642 | " \n", 643 | " \n", 644 | " \n", 645 | " \n", 646 | " \n", 647 | " \n", 648 | " \n", 649 | " \n", 650 | " \n", 651 | " \n", 652 | " \n", 653 | " \n", 654 | " \n", 655 | " \n", 656 | " \n", 657 | " \n", 658 | " \n", 659 | " \n", 660 | " \n", 661 | " \n", 662 | " \n", 663 | " \n", 664 | " \n", 665 | " \n", 666 | " \n", 667 | " \n", 668 | " \n", 669 | " \n", 670 | " \n", 671 | " \n", 672 | " \n", 673 | " \n", 674 | " \n", 675 | " \n", 676 | " \n", 677 | " \n", 678 | " \n", 679 | " \n", 680 | " \n", 681 | " \n", 682 | " \n", 683 | " \n", 684 | " \n", 685 | " \n", 686 | " \n", 687 | " \n", 688 | " \n", 689 | " \n", 690 | " \n", 691 | " \n", 692 | " \n", 693 | " \n", 694 | " \n", 695 | " \n", 696 | " \n", 697 | " \n", 698 | " \n", 699 | " \n", 700 | " \n", 701 | " \n", 702 | " \n", 703 | " \n", 704 | " \n", 705 | " \n", 706 | " \n", 707 | " \n", 708 | " \n", 709 | " \n", 710 | " \n", 711 | " \n", 712 | " \n", 713 | " \n", 714 | " \n", 715 | " \n", 716 | " \n", 717 | " \n", 718 | " \n", 719 | " \n", 720 | " \n", 721 | " \n", 722 | " \n", 723 | "
feedback.correctnesserrorexecution_timerun_id
count9.00000009.0000009
uniqueNaN0NaN9
topNaNNaNNaN0eac9a43-0ddf-453b-b2e8-e6c4d6010c33
freqNaNNaNNaN1
mean0.888889NaN15.886570NaN
std0.333333NaN5.047527NaN
min0.000000NaN10.196605NaN
25%1.000000NaN11.405077NaN
50%1.000000NaN14.915465NaN
75%1.000000NaN18.377867NaN
max1.000000NaN25.690422NaN
\n", 724 | "
" 725 | ], 726 | "text/plain": [ 727 | " feedback.correctness error execution_time \\\n", 728 | "count 9.000000 0 9.000000 \n", 729 | "unique NaN 0 NaN \n", 730 | "top NaN NaN NaN \n", 731 | "freq NaN NaN NaN \n", 732 | "mean 0.888889 NaN 15.886570 \n", 733 | "std 0.333333 NaN 5.047527 \n", 734 | "min 0.000000 NaN 10.196605 \n", 735 | "25% 1.000000 NaN 11.405077 \n", 736 | "50% 1.000000 NaN 14.915465 \n", 737 | "75% 1.000000 NaN 18.377867 \n", 738 | "max 1.000000 NaN 25.690422 \n", 739 | "\n", 740 | " run_id \n", 741 | "count 9 \n", 742 | "unique 9 \n", 743 | "top 0eac9a43-0ddf-453b-b2e8-e6c4d6010c33 \n", 744 | "freq 1 \n", 745 | "mean NaN \n", 746 | "std NaN \n", 747 | "min NaN \n", 748 | "25% NaN \n", 749 | "50% NaN \n", 750 | "75% NaN \n", 751 | "max NaN " 752 | ] 753 | }, 754 | "metadata": {}, 755 | "output_type": "display_data" 756 | }, 757 | { 758 | "name": "stdout", 759 | "output_type": "stream", 760 | "text": [ 761 | "View the evaluation results for project 'docugami_kg_rag_zero_shot_e0d8d8e5-94b8-4fa8-829c-92ad7d3f9f4b' at:\n", 762 | "https://smith.langchain.com/o/530c4d06-5640-4c0f-94fe-0be7b769531f/datasets/a5db8a49-d0eb-4150-83e4-68bf08ad8ebf/compare?selectedSessions=e5cd6b66-4fdf-48e5-bae0-5699f73147ae\n", 763 | "\n", 764 | "View all tests for Dataset SEC 10Q Filings 2024-05-03 at:\n", 765 | "https://smith.langchain.com/o/530c4d06-5640-4c0f-94fe-0be7b769531f/datasets/a5db8a49-d0eb-4150-83e4-68bf08ad8ebf\n", 766 | "[> ] 0/9" 767 | ] 768 | }, 769 | { 770 | "name": "stderr", 771 | "output_type": "stream", 772 | "text": [ 773 | "Parent run 9258d0f6-2d6f-4c9b-86da-a34dca82b41a not found for run 15089bce-b23e-41c3-9b37-8ed703db9ed2. Treating as a root run.\n", 774 | "Parent run c033e3b4-cb93-4b5a-87f8-706a50d613a0 not found for run 9d4be14c-2c2f-4b7f-b8e7-503245a43a4d. Treating as a root run.\n" 775 | ] 776 | }, 777 | { 778 | "name": "stdout", 779 | "output_type": "stream", 780 | "text": [ 781 | "[----------> ] 2/9" 782 | ] 783 | }, 784 | { 785 | "name": "stderr", 786 | "output_type": "stream", 787 | "text": [ 788 | "Parent run 356cb38e-2410-468d-a616-745d4a14867f not found for run d0358ce8-0165-49f6-83e6-f20eb8588ab6. Treating as a root run.\n", 789 | "Parent run c397d7d2-7df5-4065-8ef8-69e94e6aaa9f not found for run d76ecea9-7cfe-42e1-95df-8aa56f19b4a4. Treating as a root run.\n" 790 | ] 791 | }, 792 | { 793 | "name": "stdout", 794 | "output_type": "stream", 795 | "text": [ 796 | "[---------------------> ] 4/9" 797 | ] 798 | }, 799 | { 800 | "name": "stderr", 801 | "output_type": "stream", 802 | "text": [ 803 | "Parent run f28d76e6-897f-4fe3-b39d-238882f1ddad not found for run f3f5f6a9-7c07-4aa7-af63-1c7cf8d15360. Treating as a root run.\n", 804 | "Parent run 7678a7f7-f140-4736-9590-3f9bf8337e6e not found for run 6a54b6ac-e1d7-49e4-9200-2d5a0e13c369. Treating as a root run.\n" 805 | ] 806 | }, 807 | { 808 | "name": "stdout", 809 | "output_type": "stream", 810 | "text": [ 811 | "[--------------------------------> ] 6/9" 812 | ] 813 | }, 814 | { 815 | "name": "stderr", 816 | "output_type": "stream", 817 | "text": [ 818 | "Parent run 6c67638f-b72f-496c-9b94-9db6d524ee36 not found for run 314e3c28-88bc-4501-bf56-d464efeab225. Treating as a root run.\n", 819 | "Parent run 41d0e40d-e158-4d25-9bb9-168b1e537224 not found for run bf310d5d-1d69-4626-83d8-42b0930d9231. Treating as a root run.\n" 820 | ] 821 | }, 822 | { 823 | "name": "stdout", 824 | "output_type": "stream", 825 | "text": [ 826 | "[-------------------------------------------> ] 8/9" 827 | ] 828 | }, 829 | { 830 | "name": "stderr", 831 | "output_type": "stream", 832 | "text": [ 833 | "Parent run 58e74066-a088-4972-a576-22197f970abf not found for run b33ff39f-9791-49c4-8539-37db00fbeb73. Treating as a root run.\n" 834 | ] 835 | }, 836 | { 837 | "name": "stdout", 838 | "output_type": "stream", 839 | "text": [ 840 | "[------------------------------------------------->] 9/9" 841 | ] 842 | }, 843 | { 844 | "data": { 845 | "text/html": [ 846 | "

Experiment Results:

" 847 | ], 848 | "text/plain": [ 849 | "" 850 | ] 851 | }, 852 | "metadata": {}, 853 | "output_type": "display_data" 854 | }, 855 | { 856 | "data": { 857 | "text/html": [ 858 | "
\n", 859 | "\n", 872 | "\n", 873 | " \n", 874 | " \n", 875 | " \n", 876 | " \n", 877 | " \n", 878 | " \n", 879 | " \n", 880 | " \n", 881 | " \n", 882 | " \n", 883 | " \n", 884 | " \n", 885 | " \n", 886 | " \n", 887 | " \n", 888 | " \n", 889 | " \n", 890 | " \n", 891 | " \n", 892 | " \n", 893 | " \n", 894 | " \n", 895 | " \n", 896 | " \n", 897 | " \n", 898 | " \n", 899 | " \n", 900 | " \n", 901 | " \n", 902 | " \n", 903 | " \n", 904 | " \n", 905 | " \n", 906 | " \n", 907 | " \n", 908 | " \n", 909 | " \n", 910 | " \n", 911 | " \n", 912 | " \n", 913 | " \n", 914 | " \n", 915 | " \n", 916 | " \n", 917 | " \n", 918 | " \n", 919 | " \n", 920 | " \n", 921 | " \n", 922 | " \n", 923 | " \n", 924 | " \n", 925 | " \n", 926 | " \n", 927 | " \n", 928 | " \n", 929 | " \n", 930 | " \n", 931 | " \n", 932 | " \n", 933 | " \n", 934 | " \n", 935 | " \n", 936 | " \n", 937 | " \n", 938 | " \n", 939 | " \n", 940 | " \n", 941 | " \n", 942 | " \n", 943 | " \n", 944 | " \n", 945 | " \n", 946 | " \n", 947 | " \n", 948 | " \n", 949 | " \n", 950 | " \n", 951 | " \n", 952 | " \n", 953 | " \n", 954 | " \n", 955 | " \n", 956 | " \n", 957 | " \n", 958 | " \n", 959 | " \n", 960 | " \n", 961 | "
feedback.correctnesserrorexecution_timerun_id
count9.00000009.0000009
uniqueNaN0NaN9
topNaNNaNNaNc78f8ea0-04de-4848-ba8e-24c152e5a750
freqNaNNaNNaN1
mean0.555556NaN1.883969NaN
std0.527046NaN0.221630NaN
min0.000000NaN1.631498NaN
25%0.000000NaN1.769093NaN
50%1.000000NaN1.820031NaN
75%1.000000NaN1.974672NaN
max1.000000NaN2.254725NaN
\n", 962 | "
" 963 | ], 964 | "text/plain": [ 965 | " feedback.correctness error execution_time \\\n", 966 | "count 9.000000 0 9.000000 \n", 967 | "unique NaN 0 NaN \n", 968 | "top NaN NaN NaN \n", 969 | "freq NaN NaN NaN \n", 970 | "mean 0.555556 NaN 1.883969 \n", 971 | "std 0.527046 NaN 0.221630 \n", 972 | "min 0.000000 NaN 1.631498 \n", 973 | "25% 0.000000 NaN 1.769093 \n", 974 | "50% 1.000000 NaN 1.820031 \n", 975 | "75% 1.000000 NaN 1.974672 \n", 976 | "max 1.000000 NaN 2.254725 \n", 977 | "\n", 978 | " run_id \n", 979 | "count 9 \n", 980 | "unique 9 \n", 981 | "top c78f8ea0-04de-4848-ba8e-24c152e5a750 \n", 982 | "freq 1 \n", 983 | "mean NaN \n", 984 | "std NaN \n", 985 | "min NaN \n", 986 | "25% NaN \n", 987 | "50% NaN \n", 988 | "75% NaN \n", 989 | "max NaN " 990 | ] 991 | }, 992 | "metadata": {}, 993 | "output_type": "display_data" 994 | }, 995 | { 996 | "name": "stdout", 997 | "output_type": "stream", 998 | "text": [ 999 | "View the evaluation results for project 'openai_assistant_retrieval_e0d8d8e5-94b8-4fa8-829c-92ad7d3f9f4b' at:\n", 1000 | "https://smith.langchain.com/o/530c4d06-5640-4c0f-94fe-0be7b769531f/datasets/a5db8a49-d0eb-4150-83e4-68bf08ad8ebf/compare?selectedSessions=d3fe0fbf-4e05-4e2d-ba06-31b4e72154c6\n", 1001 | "\n", 1002 | "View all tests for Dataset SEC 10Q Filings 2024-05-03 at:\n", 1003 | "https://smith.langchain.com/o/530c4d06-5640-4c0f-94fe-0be7b769531f/datasets/a5db8a49-d0eb-4150-83e4-68bf08ad8ebf\n", 1004 | "[---------------------------> ] 5/9" 1005 | ] 1006 | }, 1007 | { 1008 | "ename": "KeyboardInterrupt", 1009 | "evalue": "", 1010 | "output_type": "error", 1011 | "traceback": [ 1012 | "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", 1013 | "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", 1014 | "Cell \u001b[0;32mIn[11], line 42\u001b[0m\n\u001b[1;32m 40\u001b[0m run_id \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m(uuid\u001b[38;5;241m.\u001b[39muuid4())\n\u001b[1;32m 41\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m project_name, agent \u001b[38;5;129;01min\u001b[39;00m agent_map\u001b[38;5;241m.\u001b[39mitems():\n\u001b[0;32m---> 42\u001b[0m \u001b[43mrun_eval\u001b[49m\u001b[43m(\u001b[49m\u001b[43magent\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mproject_name\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m_\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mrun_id\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 43\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[1;32m 44\u001b[0m \u001b[38;5;66;03m# Revert cache setting to global default\u001b[39;00m\n\u001b[1;32m 45\u001b[0m set_llm_cache(cache)\n", 1015 | "Cell \u001b[0;32mIn[11], line 16\u001b[0m, in \u001b[0;36mrun_eval\u001b[0;34m(eval_func, eval_run_name)\u001b[0m\n\u001b[1;32m 12\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;124;03mRun eval\u001b[39;00m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 15\u001b[0m client \u001b[38;5;241m=\u001b[39m Client()\n\u001b[0;32m---> 16\u001b[0m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrun_on_dataset\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 17\u001b[0m \u001b[43m \u001b[49m\u001b[43mdataset_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mEVAL_NAME\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 18\u001b[0m \u001b[43m \u001b[49m\u001b[43mllm_or_chain_factory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meval_func\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 19\u001b[0m \u001b[43m \u001b[49m\u001b[43mevaluation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meval_config\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 20\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 21\u001b[0m \u001b[43m \u001b[49m\u001b[43mproject_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meval_run_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcurrency_level\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Reduced to help with rate limits, but will take longer\u001b[39;49;00m\n\u001b[1;32m 23\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", 1016 | "File \u001b[0;32m~/Source/github/langchain-template-docugami-kg-rag/.venv/lib/python3.9/site-packages/langsmith/client.py:4255\u001b[0m, in \u001b[0;36mClient.run_on_dataset\u001b[0;34m(self, dataset_name, llm_or_chain_factory, evaluation, concurrency_level, project_name, project_metadata, dataset_version, verbose, input_mapper, revision_id, **kwargs)\u001b[0m\n\u001b[1;32m 4250\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n\u001b[1;32m 4251\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m 4252\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe client.run_on_dataset function requires the langchain\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 4253\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpackage to run.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mInstall with pip install langchain\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 4254\u001b[0m )\n\u001b[0;32m-> 4255\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_run_on_dataset\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4256\u001b[0m \u001b[43m \u001b[49m\u001b[43mdataset_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4257\u001b[0m \u001b[43m \u001b[49m\u001b[43mllm_or_chain_factory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mllm_or_chain_factory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4258\u001b[0m \u001b[43m \u001b[49m\u001b[43mconcurrency_level\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconcurrency_level\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4259\u001b[0m \u001b[43m \u001b[49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4260\u001b[0m \u001b[43m \u001b[49m\u001b[43mevaluation\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mevaluation\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4261\u001b[0m \u001b[43m \u001b[49m\u001b[43mproject_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproject_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4262\u001b[0m \u001b[43m \u001b[49m\u001b[43mproject_metadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproject_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4263\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4264\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_mapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_mapper\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4265\u001b[0m \u001b[43m \u001b[49m\u001b[43mrevision_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4266\u001b[0m \u001b[43m \u001b[49m\u001b[43mdataset_version\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset_version\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4267\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4268\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", 1017 | "File \u001b[0;32m~/Source/github/langchain-template-docugami-kg-rag/.venv/lib/python3.9/site-packages/langchain/smith/evaluation/runner_utils.py:1399\u001b[0m, in \u001b[0;36mrun_on_dataset\u001b[0;34m(client, dataset_name, llm_or_chain_factory, evaluation, dataset_version, concurrency_level, project_name, project_metadata, verbose, revision_id, **kwargs)\u001b[0m\n\u001b[1;32m 1397\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1398\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m runnable_config\u001b[38;5;241m.\u001b[39mget_executor_for_config(container\u001b[38;5;241m.\u001b[39mconfigs[\u001b[38;5;241m0\u001b[39m]) \u001b[38;5;28;01mas\u001b[39;00m executor:\n\u001b[0;32m-> 1399\u001b[0m batch_results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1400\u001b[0m \u001b[43m \u001b[49m\u001b[43mexecutor\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmap\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1401\u001b[0m \u001b[43m \u001b[49m\u001b[43mfunctools\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpartial\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1402\u001b[0m \u001b[43m \u001b[49m\u001b[43m_run_llm_or_chain\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1403\u001b[0m \u001b[43m \u001b[49m\u001b[43mllm_or_chain_factory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcontainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwrapped_model\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1404\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_mapper\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minput_mapper\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1405\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1406\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexamples\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1407\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontainer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mconfigs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1408\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1409\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1411\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m container\u001b[38;5;241m.\u001b[39mfinish(batch_results, verbose\u001b[38;5;241m=\u001b[39mverbose)\n", 1018 | "File \u001b[0;32m/usr/lib/python3.9/concurrent/futures/_base.py:609\u001b[0m, in \u001b[0;36mExecutor.map..result_iterator\u001b[0;34m()\u001b[0m\n\u001b[1;32m 606\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m fs:\n\u001b[1;32m 607\u001b[0m \u001b[38;5;66;03m# Careful not to keep a reference to the popped future\u001b[39;00m\n\u001b[1;32m 608\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 609\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m \u001b[43mfs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpop\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresult\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 610\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 611\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m fs\u001b[38;5;241m.\u001b[39mpop()\u001b[38;5;241m.\u001b[39mresult(end_time \u001b[38;5;241m-\u001b[39m time\u001b[38;5;241m.\u001b[39mmonotonic())\n", 1019 | "File \u001b[0;32m/usr/lib/python3.9/concurrent/futures/_base.py:441\u001b[0m, in \u001b[0;36mFuture.result\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 438\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;241m==\u001b[39m FINISHED:\n\u001b[1;32m 439\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__get_result()\n\u001b[0;32m--> 441\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_condition\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtimeout\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 443\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_state \u001b[38;5;129;01min\u001b[39;00m [CANCELLED, CANCELLED_AND_NOTIFIED]:\n\u001b[1;32m 444\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m CancelledError()\n", 1020 | "File \u001b[0;32m/usr/lib/python3.9/threading.py:312\u001b[0m, in \u001b[0;36mCondition.wait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m 310\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m: \u001b[38;5;66;03m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[39;00m\n\u001b[1;32m 311\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m timeout \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m--> 312\u001b[0m \u001b[43mwaiter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43macquire\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 313\u001b[0m gotit \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 314\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", 1021 | "\u001b[0;31mKeyboardInterrupt\u001b[0m: " 1022 | ] 1023 | } 1024 | ], 1025 | "source": [ 1026 | "import uuid\n", 1027 | "from langsmith.client import Client\n", 1028 | "from langchain.smith import RunEvalConfig\n", 1029 | "from langchain.globals import set_llm_cache, get_llm_cache\n", 1030 | "\n", 1031 | "eval_config = RunEvalConfig(\n", 1032 | " evaluators=[\"qa\"],\n", 1033 | ")\n", 1034 | "\n", 1035 | "\n", 1036 | "def run_eval(eval_func, eval_run_name):\n", 1037 | " \"\"\"\n", 1038 | " Run eval\n", 1039 | " \"\"\"\n", 1040 | " client = Client()\n", 1041 | " client.run_on_dataset(\n", 1042 | " dataset_name=EVAL_NAME,\n", 1043 | " llm_or_chain_factory=eval_func,\n", 1044 | " evaluation=eval_config,\n", 1045 | " verbose=True,\n", 1046 | " project_name=eval_run_name,\n", 1047 | " concurrency_level=2, # Reduced to help with rate limits, but will take longer\n", 1048 | " )\n", 1049 | "\n", 1050 | "\n", 1051 | "# Experiments\n", 1052 | "agent_map = {\n", 1053 | " \"docugami_kg_rag_zero_shot\": predict_docugami_agent,\n", 1054 | " \"openai_assistant_retrieval\": predict_openai_agent,\n", 1055 | "}\n", 1056 | "\n", 1057 | "try:\n", 1058 | " # Disable global cache setting to get fresh results every time for all experiments\n", 1059 | " # since no caching or temperature-0 is supported for the openai assistants API and\n", 1060 | " # we want to measure under similar conditions\n", 1061 | " cache = get_llm_cache()\n", 1062 | " set_llm_cache(None)\n", 1063 | "\n", 1064 | " for i in range(PER_EXPERIMENT_RUN_COUNT):\n", 1065 | " run_id = str(uuid.uuid4())\n", 1066 | " for project_name, agent in agent_map.items():\n", 1067 | " run_eval(agent, project_name + \"_\" + run_id)\n", 1068 | "finally:\n", 1069 | " # Revert cache setting to global default\n", 1070 | " set_llm_cache(cache)" 1071 | ] 1072 | }, 1073 | { 1074 | "cell_type": "code", 1075 | "execution_count": null, 1076 | "metadata": {}, 1077 | "outputs": [], 1078 | "source": [] 1079 | } 1080 | ], 1081 | "metadata": { 1082 | "kernelspec": { 1083 | "display_name": "app-sMPCFT4i-py3.9", 1084 | "language": "python", 1085 | "name": "python3" 1086 | }, 1087 | "language_info": { 1088 | "codemirror_mode": { 1089 | "name": "ipython", 1090 | "version": 3 1091 | }, 1092 | "file_extension": ".py", 1093 | "mimetype": "text/x-python", 1094 | "name": "python", 1095 | "nbconvert_exporter": "python", 1096 | "pygments_lexer": "ipython3", 1097 | "version": "3.9.16" 1098 | } 1099 | }, 1100 | "nbformat": 4, 1101 | "nbformat_minor": 2 1102 | } 1103 | --------------------------------------------------------------------------------