├── .env.sample ├── .github └── workflows │ └── main.yml ├── .gitignore ├── CHANGELOG.md ├── README.md ├── docs ├── api.md ├── index.md ├── installation.md └── tutorial.md ├── examples ├── assets │ ├── specialists.csv │ └── specialists.json ├── create_graph_from_csv.ipynb ├── create_graph_from_questions.ipynb └── create_graph_from_schema.ipynb ├── mkdocs.yml ├── pyproject.toml ├── src └── whyhow │ ├── __init__.py │ ├── apis │ ├── __init__.py │ ├── base.py │ └── graph.py │ ├── client.py │ ├── exceptions.py │ ├── py.typed │ └── schemas │ ├── __init__.py │ ├── base.py │ ├── common.py │ └── graph.py └── tests ├── apis └── test_graph.py ├── conftest.py ├── schemas └── test_common.py ├── test_client.py └── test_dummy.py /.env.sample: -------------------------------------------------------------------------------- 1 | # .env.example 2 | # Copy this file as '.env' and fill in the values as described below. 3 | WHYHOW_API_KEY="" 4 | 5 | # OpenAI API Key: A string containing your OpenAI API key for accessing their services. 6 | OPENAI_API_KEY="" 7 | 8 | # Pinecone API Key: A string with your Pinecone API key. 9 | PINECONE_API_KEY="" 10 | 11 | # Neo4J URI: The URI for connecting to your Neo4J database instance. 12 | NEO4J_URI="" 13 | 14 | # Neo4J User: The username for Neo4J database authentication. 15 | NEO4J_USER="" 16 | 17 | # Neo4J Password: The password for Neo4J database authentication. 18 | NEO4J_PASSWORD="" -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: all 2 | 3 | on: 4 | pull_request: 5 | push: 6 | branches: [main] 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: ${{ matrix.os }} 12 | strategy: 13 | matrix: 14 | os: [ubuntu-latest] 15 | python-version: ['3.10'] 16 | 17 | steps: 18 | - uses: actions/checkout@v2 19 | 20 | - name: Set up Python ${{ matrix.python-version }} 21 | uses: actions/setup-python@v2 22 | with: 23 | python-version: ${{ matrix.python-version }} 24 | 25 | - name: Install Python dependencies 26 | run: | 27 | python -m pip install --upgrade pip 28 | pip install -e .[dev] 29 | 30 | - name: Lint with flake8 31 | run: | 32 | flake8 src tests examples 33 | 34 | - name: Check style with black 35 | run: | 36 | black src tests examples 37 | 38 | - name: Run security check 39 | run: | 40 | bandit -qr -c pyproject.toml src examples 41 | 42 | - name: Run import check 43 | run: | 44 | isort --check src tests examples 45 | 46 | - name: Run mypy 47 | run: | 48 | mypy src 49 | 50 | - name: Test with pytest 51 | run: | 52 | pytest --color=yes 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | .DS_Store 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # poetry 99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 100 | # This is especially recommended for binary packages to ensure reproducibility, and is more 101 | # commonly ignored for libraries. 102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 103 | #poetry.lock 104 | 105 | # pdm 106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 107 | #pdm.lock 108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 109 | # in version control. 110 | # https://pdm.fming.dev/#use-with-ide 111 | .pdm.toml 112 | 113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 114 | __pypackages__/ 115 | 116 | # Celery stuff 117 | celerybeat-schedule 118 | celerybeat.pid 119 | 120 | # SageMath parsed files 121 | *.sage.py 122 | 123 | # Environments 124 | .env 125 | .venv 126 | env/ 127 | venv/ 128 | ENV/ 129 | env.bak/ 130 | venv.bak/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site 141 | 142 | # mypy 143 | .mypy_cache/ 144 | .dmypy.json 145 | dmypy.json 146 | 147 | # Pyre type checker 148 | .pyre/ 149 | 150 | # pytype static type analyzer 151 | .pytype/ 152 | 153 | # Cython debug symbols 154 | cython_debug/ 155 | 156 | # PyCharm 157 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 158 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 159 | # and can be added to the global gitignore or merged into this file. For a more nuclear 160 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 161 | #.idea/ 162 | 163 | data/ 164 | .python-version -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [Unreleased] 9 | 10 | ### Added 11 | - Add all JSON only endpoints 12 | - Anticipate all types 13 | - Add schemas 14 | - Minimal package structure + CI 15 | 16 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # (Deprecated) WhyHow Knowledge Graph Creation SDK 2 | 3 | 4 | > [!CAUTION] 5 | > This repo has been **deprecated** and is no longer supported. Instead please see the [Knowledge Graph Studio](https://github.com/whyhow-ai/knowledge-graph-studio). 6 | > We are focusing on KG orchestration and management through our KG Studio. The Knowledge Graph Studio has been open-sourced here - https://github.com/whyhow-ai/knowledge-graph-studio. 7 | 8 | 9 | [![Python Version](https://img.shields.io/badge/python-3.10%2B-blue)](https://www.python.org/downloads/) 10 | [![License](https://img.shields.io/badge/license-MIT-green)](https://opensource.org/licenses/MIT) 11 | [![PyPI Version](https://img.shields.io/pypi/v/whyhow)](https://pypi.org/project/whyhow/) 12 | [![Code Style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 13 | [![Checked with mypy](https://img.shields.io/badge/mypy-checked-blue)](https://mypy-lang.org/) 14 | [![Whyhow Discord](https://dcbadge.vercel.app/api/server/9bWqrsxgHr?compact=true&style=flat)](https://discord.gg/9bWqrsxgHr) 15 | 16 | The WhyHow Knowledge Graph Creation SDK enables you to quickly and easily build automated knowledge graphs tailored to your unique worldview. Instantly build, extend, and query well-scoped KGs with your data. 17 | 18 | 19 | # Installation 20 | 21 | ## Prerequisites 22 | 23 | - Python 3.10 or higher 24 | - [OpenAI API key](https://openai.com/) 25 | - [Pinecone API key](https://www.pinecone.io/) 26 | - [Neo4j credentials](https://neo4j.com/cloud/platform/aura-graph-database/) (username, password, and URL) 27 | 28 | ## Install from PyPI 29 | 30 | You can install the SDK directly from PyPI using pip: 31 | 32 | ```shell 33 | pip install whyhow 34 | 35 | # For OpenAI 36 | export OPENAI_API_KEY= 37 | 38 | # For Azure OpenAI 39 | export AZURE_OPENAI_API_KEY= 40 | export AZURE_OPENAI_API_VERSION= 41 | export AZURE_OPENAI_ENDPOINT= 42 | export AZURE_OPENAI_MODEL_NAME= 43 | export AZURE_OPENAI_EMBEDDING_NAME= 44 | 45 | export PINECONE_API_KEY= 46 | export NEO4J_URL= 47 | export NEO4J_USER= 48 | export NEO4J_PASSWORD= 49 | ``` 50 | 51 | ## Install from Github 52 | 53 | Alternatively, you can clone the repo and install the package 54 | 55 | ```shell 56 | 57 | git clone git@github.com:whyhow-ai/whyhow.git 58 | cd whyhow 59 | pip install . 60 | 61 | # For OpenAI 62 | export OPENAI_API_KEY= 63 | 64 | # For Azure OpenAI 65 | export AZURE_OPENAI_API_KEY= 66 | export AZURE_OPENAI_API_VERSION= 67 | export AZURE_OPENAI_ENDPOINT= 68 | 69 | export PINECONE_API_KEY= 70 | export NEO4J_URL= 71 | export NEO4J_USER= 72 | export NEO4J_PASSWORD= 73 | ``` 74 | 75 | # Examples 76 | 77 | Navigate to the `examples/`. 78 | 79 | # How to 80 | 81 | ## Initialize SDK 82 | 83 | Import the SDK and initialize the client using your WhyHow API key. 84 | 85 | ```shell 86 | from whyhow import WhyHow 87 | 88 | client = WhyHow(api_key=) 89 | ``` 90 | 91 | For Azure Open AI: 92 | 93 | ```shell 94 | from whyhow import WhyHow 95 | 96 | client = WhyHow(api_key=, use_azure=True) 97 | ``` 98 | 99 | For an alternative model (for example, healthcare for text extraction): 100 | 101 | ```shell 102 | from whyhow import WhyHow 103 | 104 | client = WhyHow(api_key=, model_type='health') 105 | ``` 106 | 107 | ## Add documents to namespace 108 | 109 | Your namespace is a logical grouping of the raw data you upload, the seed concepts you define, and the graphs you create. Namespaces are meant to be tightly scoped to your use case. You can create as many namespaces as you want. 110 | 111 | ```shell 112 | 113 | namespace = "harry-potter" 114 | documents = ["files/harry_potter_and_the_philosophers_stone.pdf","files/harry_potter_and_the_chamber_of_secrets.pdf"] 115 | 116 | documents_response = client.graph.add_documents(namespace, documents) 117 | print(documents_response) 118 | # Adding your documents 119 | 120 | ``` 121 | 122 | ## Create a graph 123 | 124 | You can create a graph in three different ways. First, you can create a graph using a user-defined schema, giving you complete control over the types of entities and relationships that are extracted and used to build the graph. You can also create a graph using a set of seed questions. In this case, WhyHow will automatically extract entities and relationships that are most applicable to the things you want to know, and construct a graph from these concepts. Or, you can fully deterministically create a graph from structured context in the form of a CSV. 125 | 126 | Create graph with **schema** if... 127 | 128 | 1. Your graph must adhere to a consistent structure. 129 | 2. You are very familiar with the structure of your raw documents. 130 | 3. You need comprehensive extraction of concepts across the entire document. 131 | 132 | Create graph with **seed questions** if... 133 | 134 | 1. You are unsure as to which relationships and patterns you'd like to build into your graph. 135 | 2. You want to build your graph with only the most semantically similar raw data. 136 | 137 | Create graph with **csv** if... 138 | 139 | 1. You alrady know the structure of your data. 140 | 2. You already have data stored in a table format. 141 | 142 | ### Create a graph with schema 143 | 144 | Tell the WhyHow SDK exactly which entities, relationships, and patterns you'd like to extract and build into your graph by defining them in a JSON-based schema. 145 | 146 | ```shell 147 | 148 | #schema.json 149 | 150 | { 151 | "entities": [ 152 | { 153 | "name": "character", 154 | "description": "A person appearing in the book, e.g., Harry Potter, Ron Weasley, Hermione Granger, Albus Dumbledore." 155 | }, 156 | { 157 | "name": "object", 158 | "description": "Inanimate items that characters use or interact with, e.g., wand, Philosopher's Stone, Invisibility Cloak, broomstick." 159 | } 160 | ... 161 | ], 162 | "relations": [ 163 | { 164 | "name": "friends with", 165 | "description": "Denotes a friendly relationship between characters." 166 | }, 167 | { 168 | "name": "interacts with", 169 | "description": "Describes a scenario in which a character engages with another character, creature, or object." 170 | }, 171 | ... 172 | ], 173 | "patterns": [ 174 | { 175 | "head": "character", 176 | "relation": "friends with", 177 | "tail": "character", 178 | "description": "One character is friends with another, e.g., Harry Potter is friends with Ron Weasley." 179 | }, 180 | { 181 | "head": "character", 182 | "relation": "interacts with", 183 | "tail": "object", 184 | "description": "A character interacting with an object, e.g., Harry Potter interacts with the Invisibility Cloak." 185 | } 186 | ] 187 | } 188 | 189 | ``` 190 | 191 | Using this schema, we extract relevant concepts from your raw data, construct triples, and generate a graph according to the patterns you define. 192 | 193 | ```shell 194 | # Create graph from schema 195 | 196 | schema = "files/schema.json" 197 | create_graph_with_schema_response = client.graph.create_graph_from_schema(namespace, schema) 198 | print(create_graph_with_schema_response) 199 | # Creating your graph 200 | 201 | ``` 202 | 203 | ### Create a graph with seed questions 204 | 205 | Tell the WhyHow SDK what you care about by providing a list of concepts in the form of natural language questions. Using these questions, we create a small ontology to guide extraction of entities and relationships that are most relevant to your use case, then construct a graph. 206 | 207 | ```shell 208 | 209 | questions = ["What does Harry wear?","Who is Harry friends with?"] 210 | 211 | create_graph_response = client.graph.create_graph(namespace, questions) 212 | print(create_graph_response) 213 | # Creating your graph 214 | 215 | ``` 216 | 217 | ### Create a graph with a csv 218 | 219 | Provide a CSV and a schema (or automatically generate one using the `generate_schema` method) to create a graph. WhyHow will automatically extract entities and relationships from your CSV headers and data. 220 | 221 | ```shell 222 | 223 | namespace = "specialists" 224 | documents = ["../examples/assets/specialists.csv"] 225 | schema_file = "../examples/assets/specialists.json" 226 | 227 | # Automatically generate a schema 228 | schema = client.graph.generate_schema(documents=documents) 229 | 230 | # Create a graph from a CSV and the schema you bring/build 231 | csv_graph = client.graph.create_graph_from_csv( 232 | namespace=namespace, schema_file=schema_file 233 | ) 234 | 235 | # Query the graph created from csv using specific entities and relations 236 | query = "Who speaks English and live in Houston?" 237 | entities = ["English","Houston"] 238 | relations = ["SPEAKS","LIVE_IN"] 239 | 240 | specific_query_response = client.graph.query_graph_specific( 241 | namespace=namespace, 242 | query=query, 243 | entities=entities, 244 | relations=relations, 245 | include_triples=False, 246 | include_chunks=False, 247 | ) 248 | 249 | ``` 250 | 251 | ### Support 252 | 253 | WhyHow.AI is building tools to help developers bring more determinism and control to their RAG pipelines using graph structures. If you're thinking about, in the process of, or have already incorporated knowledge graphs in RAG, we’d love to chat at team@whyhow.ai, or follow our newsletter at [WhyHow.AI](https://www.whyhow.ai/). Join our discussions about rules, determinism and knowledge graphs in RAG on our [Discord](https://discord.com/invite/9bWqrsxgHr). 254 | 255 | We appreciate your interest. 256 | -------------------------------------------------------------------------------- /docs/api.md: -------------------------------------------------------------------------------- 1 | Here's the generated `api.md` file for your mkdocs based on the provided code files: 2 | 3 | ```markdown 4 | # API Reference 5 | 6 | This document provides a reference for the WhyHow API, which allows you to interact with the graph functionality. 7 | 8 | ## GraphAPI 9 | 10 | The `GraphAPI` class provides methods to interact with the graph API synchronously. 11 | 12 | ### `add_documents` 13 | 14 | ```python 15 | def add_documents(self, namespace: str, documents: list[str]) -> str 16 | ``` 17 | 18 | Add documents to the graph. 19 | 20 | #### Parameters 21 | 22 | - `namespace` (str): The namespace of the graph. 23 | - `documents` (list[str]): The documents to add. 24 | 25 | #### Returns 26 | 27 | - (str): The response message. 28 | 29 | #### Raises 30 | 31 | - `ValueError`: If no documents are provided, not all documents exist, only PDFs are supported, PDFs are too large (limit: 8MB), or too many documents are provided (limit: 3 files during the beta). 32 | 33 | ### `generate_schema` 34 | 35 | ```python 36 | def generate_schema(self, documents: list[str]) -> str 37 | Generate a schema from a CSV document. 38 | ``` 39 | 40 | #### Parameters 41 | 42 | - `documents` (list[str]): The CSV documents to generate the schema from. 43 | 44 | #### Returns 45 | 46 | - (str): The generated schema as a JSON string. 47 | 48 | #### Raises 49 | 50 | - `ValueError`: If no documents are provided, not all documents exist, or the documents are not in CSV format. 51 | 52 | ### `create_graph` 53 | 54 | ```python 55 | def create_graph(self, namespace: str, questions: list[str]) -> str 56 | ``` 57 | 58 | Create a new graph. 59 | 60 | #### Parameters 61 | 62 | - `namespace` (str): The namespace of the graph to create. 63 | - `questions` (list[str]): The seed concepts to initialize the graph with. 64 | 65 | #### Returns 66 | 67 | - (str): The response message. 68 | 69 | #### Raises 70 | 71 | - `ValueError`: If no questions are provided. 72 | 73 | ### `create_graph_from_schema` 74 | 75 | ```python 76 | def create_graph_from_schema(self, namespace: str, schema_file: str) -> str 77 | ``` 78 | 79 | Create a new graph based on a user-defined schema. 80 | 81 | #### Parameters 82 | 83 | - `namespace` (str): The namespace of the graph to create. 84 | - `schema_file` (str): The schema file to use to build the graph. 85 | 86 | #### Returns 87 | 88 | - (str): The response message. 89 | 90 | #### Raises 91 | 92 | - `ValueError`: If no schema is provided. 93 | 94 | ### `create_graph_from_csv` 95 | 96 | ```python 97 | def create_graph_from_csv(self, namespace: str, schema_file: str) -> str 98 | ``` 99 | 100 | Create a new graph using a CSV based on a user-defined schema. 101 | 102 | #### Parameters 103 | 104 | - `namespace` (str): The namespace of the graph to create. 105 | - `schema_file` (str): The schema file to use to build the graph. 106 | 107 | #### Returns 108 | 109 | - (str): The response message. 110 | 111 | #### Raises 112 | 113 | - `ValueError`: If no schema is provided or if the schema contains invalid property column names. 114 | 115 | ### `query_graph` 116 | 117 | ```python 118 | def query_graph(self, namespace: str, query: str, include_triples: bool = False, include_chunks: bool = False) -> QueryGraphReturn 119 | ``` 120 | 121 | Query the graph. 122 | 123 | #### Parameters 124 | 125 | - `namespace` (str): The namespace of the graph. 126 | - `query` (str): The query to run. 127 | - `include_triples` (bool): Include the triples used in the return. 128 | - `include_chunks` (bool): Include the chunk ids and chunk text in the return. 129 | 130 | #### Returns 131 | 132 | - (`QueryGraphReturn`): The answer, triples (optional), and chunks (optional). 133 | 134 | ### `query_graph_specific` 135 | 136 | ```python 137 | def query_graph_specific( 138 | self, 139 | namespace: str, 140 | query: str, 141 | entities: list[str] = [], 142 | relations: list[str] = [], 143 | include_triples: bool = False, 144 | include_chunks: bool = False, 145 | ) -> SpecificQueryGraphResponse 146 | ``` 147 | 148 | Query the graph with specific entities and relations. 149 | 150 | ### Parameters 151 | 152 | - `namespace` (str): The namespace of the graph. 153 | - `query` (str): The query to run. 154 | - `entities` (list[str]): The entities to query. 155 | - `relations` (list[str]): The relations to query. 156 | - `include_triples` (bool): Whether to include triples in the response. 157 | - `include_chunks` (bool): Whether to include chunks in the response. 158 | 159 | #### Returns 160 | 161 | - (SpecificQueryGraphResponse): The namespace, answer, triples, and chunks. 162 | 163 | ## Schemas 164 | 165 | The WhyHow API uses Pydantic models to define the request and response schemas. 166 | 167 | ### `AddDocumentsResponse` 168 | 169 | ```python 170 | class AddDocumentsResponse(BaseResponse): 171 | """Schema for the response body of the add documents endpoint.""" 172 | 173 | namespace: str 174 | message: str 175 | ``` 176 | 177 | ### `CreateQuestionGraphRequest` 178 | 179 | ```python 180 | class CreateQuestionGraphRequest(BaseRequest): 181 | """Schema for the request body of the create graph endpoint.""" 182 | 183 | questions: list[str] 184 | ``` 185 | 186 | ### `CreateSchemaGraphRequest` 187 | 188 | ```python 189 | class CreateSchemaGraphRequest(BaseRequest): 190 | """Schema for the request body of the create graph endpoint.""" 191 | 192 | graph_schema: SchemaModel 193 | ``` 194 | 195 | ### `CreateGraphResponse` 196 | 197 | ```python 198 | class CreateGraphResponse(BaseResponse): 199 | """Schema for the response body of the create graph endpoint.""" 200 | 201 | namespace: str 202 | message: str 203 | ``` 204 | 205 | ### `QueryGraphRequest` 206 | 207 | ```python 208 | class QueryGraphRequest(BaseRequest): 209 | """Schema for the request body of the query graph endpoint.""" 210 | 211 | query: str 212 | ``` 213 | 214 | ### `QueryGraphResponse` 215 | 216 | ```python 217 | class QueryGraphResponse(BaseResponse): 218 | """Schema for the response body of the query graph endpoint.""" 219 | 220 | namespace: str 221 | answer: str 222 | include_triples: bool = False 223 | include_chunks: bool = False 224 | ``` 225 | 226 | ### `QueryGraphReturn` 227 | 228 | ```python 229 | class QueryGraphReturn(BaseReturn): 230 | """Schema for the return value of the query graph endpoint.""" 231 | 232 | answer: str 233 | ``` 234 | 235 | ## Base Classes 236 | 237 | The WhyHow API uses the following base classes for the API schemas: 238 | 239 | ### `APIBase` 240 | 241 | ```python 242 | class APIBase(BaseModel, ABC): 243 | """Base class for API schemas.""" 244 | 245 | model_config = ConfigDict(arbitrary_types_allowed=True) 246 | 247 | client: Client 248 | prefix: str = "" 249 | ``` 250 | 251 | ### `AsyncAPIBase` 252 | 253 | ```python 254 | class AsyncAPIBase(BaseModel, ABC): 255 | """Base class for async API schemas.""" 256 | 257 | model_config = ConfigDict(arbitrary_types_allowed=True) 258 | 259 | client: AsyncClient 260 | prefix: str = "" 261 | ``` 262 | ``` 263 | 264 | This `api.md` file provides an overview of the `GraphAPI` class and its methods, along with the request and response schemas used by the API. It also includes information about the base classes used for the API schemas. 265 | 266 | You can include this file in your mkdocs documentation to provide a reference for the WhyHow API. -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # Welcome to the WhyHow Automated Knowledge Graph Creation SDK Documentation 2 | 3 | ![Python 3.10](https://img.shields.io/badge/python-3.10-blue.svg) 4 | ![Python 3.11](https://img.shields.io/badge/python-3.11-blue.svg) 5 | 6 | The WhyHow Knowledge Graph Creation SDK enables you to quickly and easily build automated knowledge graphs tailored to your unique worldview. Instantly build, extend, and query well-scoped KGs using a raw PDF and simple seed concepts in natural language. 7 | 8 | ## Key Features 9 | 10 | - Instantaneously knowledge graphs using your documents and seed concepts (currently supports PDF files) 11 | - Simple API for querying your knowledge graphs using natural language 12 | - Bring your own keys for OpenAI, Pinecone, and Neo4j 13 | 14 | ## Getting Started 15 | 16 | 1. Install the package by following the [Installation Guide](installation.md) 17 | 2. Set up your OpenAI, Pinecone, and Neo4j credential as environment variables 18 | 3. Initialize the SDK with your WhyHow API key 19 | 4. Create a namespace and add raw documents using `graph.add_documents()` 20 | 5. Create a graph for the namespace using `graph.create_graph()` using a list of seed concepts 21 | 6. Query the graph with natural language using `graph.query_graph()` 22 | 23 | For a detailed walkthrough and code examples, check out the [Tutorial](tutorial.md). 24 | -------------------------------------------------------------------------------- /docs/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | ## Prerequisites 4 | 5 | - Python 3.10 or higher 6 | - OpenAI API key 7 | - Pinecone API key 8 | - Neo4j credentials (username, password, and URL) 9 | 10 | ## Install from PyPI 11 | 12 | You can install the SDK directly from PyPI using pip: 13 | 14 | ```shell 15 | pip install whyhow 16 | 17 | export OPENAI_API_KEY= 18 | export PINECONE_API_KEY= 19 | export NEO4J_URL= 20 | export NEO4J_USER= 21 | export NEO4J_PASSWORD= 22 | ``` 23 | 24 | If using Azure OpenAI: 25 | 26 | ```shell 27 | pip install whyhow 28 | 29 | export AZURE_OPENAI_API_KEY= 30 | export AZURE_OPENAI_API_VERSION= 31 | export AZURE_OPENAI_ENDPOINT= 32 | export AZURE_OPENAI_MODEL_NAME= 33 | export AZURE_OPENAI_EMBEDDING_NAME= 34 | export PINECONE_API_KEY= 35 | export NEO4J_URL= 36 | export NEO4J_USER= 37 | export NEO4J_PASSWORD= 38 | ``` 39 | ## Install from Github 40 | 41 | Alternatively, you can clone the repo and install the package 42 | 43 | ```shell 44 | 45 | git clone git@github.com:whyhow-ai/whyhow.git 46 | cd whyhow 47 | pip install . 48 | 49 | export OPENAI_API_KEY= 50 | export PINECONE_API_KEY= 51 | export NEO4J_URL= 52 | export NEO4J_USER= 53 | export NEO4J_PASSWORD= 54 | ``` 55 | 56 | ## Troubleshooting 57 | 58 | If you encounter any issues during installation, please check the following: 59 | 60 | - Ensure that you have Python 3.10 or higher installed. You can check your Python version by running `python --version` in your terminal. 61 | - Make sure that you have correctly set the `OPENAI_API_KEY`, `PINECONE_API_KEY`, `NEO4J_URL`, `NEO4J_USER`, `NEO4J_PASSWORD` environment variables with your respective credentials. 62 | - If you are installing from GitHub, ensure that you have cloned the repository correctly and are in the right directory. 63 | - If you are using a virtual environment, make sure that it is activated before running the installation commands. 64 | - If you still face problems, please open an issue on the GitHub repository with detailed information about the error and your environment setup. 65 | -------------------------------------------------------------------------------- /docs/tutorial.md: -------------------------------------------------------------------------------- 1 | # Tutorial 2 | 3 | This is a straightforward tutorial on how to build and query a knowledge graph using PDF texts of Harry Potter books using WhyHow SDK. This example will guide you through importing documents from the Harry Potter series into the knowledge graph, then querying it for insights related to the series. 4 | 5 | ## Environment Setup 6 | 7 | Ensure you have the following: 8 | 9 | - Python 3.10 or higher 10 | - OpenAI API key 11 | - Pinecone API key 12 | - Neo4j credentials (username, password, and URL) 13 | 14 | To keep your API keys and credentials secure, set them as environment variables. Open your terminal and run the following commands, substituting the placeholders with your actual data: 15 | 16 | ```shell 17 | export WHYHOW_API_KEY= 18 | export PINECONE_API_KEY= 19 | export OPENAI_API_KEY= 20 | export NEO4J_USER= 21 | export NEO4J_PASSWORD= 22 | export NEO4J_URL= 23 | ``` 24 | 25 | ## Install WhyHow SDK 26 | 27 | If you haven't already, install the `WhyHow SDK` using pip: 28 | 29 | ```shell 30 | pip install whyhow 31 | ``` 32 | 33 | ## Configure the WhyHow Client 34 | 35 | With your environment variables set, you can now configure the `WhyHow` client in your Python script. The client will automatically read in your environment variables, or you can override those values by specifying them in the client config. 36 | 37 | ```shell 38 | import os 39 | from whyhow import WhyHow 40 | 41 | client = WhyHow(api_key=) 42 | ``` 43 | 44 | ## Option 1 - Create the Knowledge Graph from a schema 45 | 46 | First, you need to define the namespace for your project and specify the paths to your Harry Potter book documents. Your namespace is a logical grouping of the raw data you upload, the schema you define, and the graphs you create. Namespaces are meant to be tightly scoped to your use case. You can create as many namespaces as you want. 47 | 48 | ```shell 49 | namespace = "harry-potter" 50 | documents = [ 51 | "path/to/harry_potter_and_the_philosophers_stone.pdf", 52 | "path/to/harry_potter_and_the_chamber_of_secrets.pdf" 53 | # Add paths to the rest of the Harry Potter series documents 54 | ] 55 | 56 | # Add documents to your namespace 57 | documents_response = client.graph.add_documents(namespace, documents) 58 | print("Documents Added:", documents_response) 59 | 60 | ``` 61 | 62 | Next, you must create a schema which defines the entities, relationships, and patterns you'd like to use to construct the graph. Create this and save it as a JSON file. 63 | 64 | ```shell 65 | 66 | #schema.json 67 | 68 | { 69 | "entities": [ 70 | { 71 | "name": "character", 72 | "description": "A person appearing in the book, e.g., Harry Potter, Ron Weasley, Hermione Granger, Albus Dumbledore." 73 | }, 74 | { 75 | "name": "object", 76 | "description": "Inanimate items that characters use or interact with, e.g., wand, Philosopher's Stone, Invisibility Cloak, broomstick." 77 | } 78 | ], 79 | "relations": [ 80 | { 81 | "name": "friends with", 82 | "description": "Denotes a friendly relationship between characters." 83 | }, 84 | { 85 | "name": "interacts with", 86 | "description": "Describes a scenario in which a character engages with another character, creature, or object." 87 | }, 88 | ], 89 | "patterns": [ 90 | { 91 | "head": "character", 92 | "relation": "friends with", 93 | "tail": "character", 94 | "description": "One character is friends with another, e.g., Harry Potter is friends with Ron Weasley." 95 | }, 96 | { 97 | "head": "character", 98 | "relation": "interacts with", 99 | "tail": "object", 100 | "description": "A character interacting with an object, e.g., Harry Potter interacts with the Invisibility Cloak." 101 | } 102 | ] 103 | } 104 | 105 | ``` 106 | 107 | Then, create the graph using the schema and the uploaded documents: 108 | 109 | ```shell 110 | # Create graph from schema 111 | 112 | schema = "./schema.json" 113 | create_graph_with_schema_response = client.graph.create_graph_from_schema(namespace, schema) 114 | print(create_graph_with_schema_response) 115 | # Creating your graph 116 | 117 | ``` 118 | 119 | ## Option 2 - Create the Knowledge Graph from seed questions 120 | 121 | Alternatively, you can create a graph using seed concepts in the form of questions written in natural language. We'll create a new namespace and upload the same data. 122 | 123 | ```shell 124 | namespace = "harry-potter-2" 125 | documents = [ 126 | "path/to/harry_potter_and_the_philosophers_stone.pdf", 127 | "path/to/harry_potter_and_the_chamber_of_secrets.pdf" 128 | # Add paths to the rest of the Harry Potter series documents 129 | ] 130 | 131 | # Add documents to your namespace 132 | documents_response = client.graph.add_documents(namespace, documents) 133 | print("Documents Added:", documents_response) 134 | 135 | ``` 136 | 137 | Create the knowledge graph from the seed questions and the uploaded documents: 138 | 139 | ```shell 140 | questions = ["What does Harry look like?","What does Hermione look like?","What does Ron look like?"] 141 | extracted_graph = client.graph.create_graph(namespace, questions) 142 | print("Extracted Graph:", extracted_graph) 143 | 144 | ``` 145 | 146 | ## Option 3 - Create the Knowledge Graph from CSV 147 | 148 | WhyHow also supports creating a graph from structured data in the form a CSV file. Note, right now we only support creating a graph from one CSV file per namespace. If you upload more than one file, the first will be overwritten. 149 | 150 | ```shell 151 | namespace = "specialists" 152 | documents = ["../examples/assets/specialists.csv"] 153 | schema_file = "../examples/assets/specialists.json" 154 | 155 | # Automatically generate a schema 156 | schema = client.graph.generate_schema(documents=documents) 157 | print(schema) 158 | 159 | # Add documents to your namespace 160 | documents_response = client.graph.add_documents( 161 | namespace=namespace, documents=documents) 162 | 163 | ``` 164 | 165 | You can automatically generate a schema from a CSV document using the `generate_schema` method of the `GraphAPI` class. 166 | 167 | ```python 168 | csv_documents = ["path/to/your/csv/file.csv"] 169 | generated_schema = client.graph.generate_schema(documents=csv_documents) 170 | print(generated_schema) 171 | ``` 172 | 173 | Use the `create_graph_from_csv` function to create a graph from the uploaded CSV file. The function will automatically use the schema provided to generate the graph 174 | 175 | ```shell 176 | csv_graph = client.graph.create_graph_from_csv( 177 | namespace=namespace, schema_file=schema_file 178 | ) 179 | 180 | print(csv_graph) 181 | 182 | ``` 183 | 184 | ## Querying the Knowledge Graph 185 | 186 | With the graphs created, you can now query them to find specific information: 187 | 188 | ```shell 189 | # Query the graph created from csv using specific entities and relations 190 | query = "Who speaks English and live in Houston?" 191 | entities = ["English","Houston"] 192 | relations = ["SPEAKS","LIVE_IN"] 193 | 194 | specific_query_response = client.graph.query_graph_specific( 195 | namespace=namespace, 196 | query=query, 197 | entities=entities, 198 | relations=relations, 199 | include_triples=False, 200 | include_chunks=False, 201 | ) 202 | 203 | print("Specific Query Response:", specific_query_response) 204 | 205 | # Query graph created from schema 206 | query = "Who is Harry friends with?" 207 | namespace = "harry-potter" 208 | schema_query_response = client.graph.query_graph(namespace, query) 209 | print("Query Response:", query_response) 210 | 211 | # Query graph created from seed questions 212 | query = "Who wears a Cloak?" 213 | namespace = "harry-potter-2" 214 | seed_questions_query_response = client.graph.query_graph(namespace, query) 215 | print("Query Response:", query_response) 216 | 217 | # Include the triples in the return 218 | query = "Who is Harry friends with?" 219 | namespace = "harry-potter" 220 | schema_query_response = client.graph.query_graph(namespace, query, include_triples = True) 221 | print("Query Response:", query_response) 222 | 223 | # Include the chunk context in the return 224 | query = "Who is Harry friends with?" 225 | namespace = "harry-potter" 226 | schema_query_response = client.graph.query_graph(namespace, query, include_chunks = True) 227 | print("Query Response:", query_response) 228 | ``` 229 | -------------------------------------------------------------------------------- /examples/assets/specialists.csv: -------------------------------------------------------------------------------- 1 | Name,Gender,Specialization,Subspecialties,Qualifications,Years of Experience,Hospital Affiliation,Department,City,State,Availability,Language 1,Language 2,Language 3,Consultation Fees,Patient Ratings,Insurance Accepted, 2 | Allison Mcmillan,Female,Pediatrics,Sports Medicine,DO,11,Barnes-Wise,Outpatient,New York,NY,"Tuesday, 17:00 - 20:00",English,,,$406.00,3.1,PrimeHealth Assurance 3 | Mr. Steven Brooks,Male,Gastroenterology,Pediatric,MD,7,Wright-Kim,Outpatient,Los Angeles,CA,"Tuesday, 16:00 - 20:00",English,,,$184.00,1.3,HarmonyHealth Plans 4 | Ann Austin,Female,Psychiatry,Geriatric,MS,23,Lucero PLC,Emergency,Phoenix,AZ,"Tuesday, 9:00 - 18:00",English,French,,$206.00,3.7,GuardianHealth Solutions 5 | Julie Johnson,Female,Dermatology,Sleep Medicine,MS,32,Gill PLC,Outpatient,Houston,TX,"Monday, 8:00 - 18:00",English,,,$273.00,2.8,VitalityCare Health 6 | Christopher Brown,Male,Endocrinology,Addiction Medicine,MD,25,Murphy-Montgomery,Radiology,Chicago,IL,"Thursday, 17:00 - 18:00",English,Hindi,Spanish,$198.00,2.8,OptimumCare Health 7 | Christopher Hamilton,Male,Cardiology,Sleep Medicine,MS,31,Jones PLC,Outpatient,Los Angeles,CA,"Wednesday, 11:00 - 18:00",English,Hindi,Mandarin,$350.00,2.7,LifeShield Insurance 8 | Mark Ward,Male,Pediatrics,Addiction Medicine,MD,25,Meza-Preston,General Medicine,New York,NY,"Wednesday, 15:00 - 18:00",English,Hindi,,$204.00,2.3,EverCare Insurance 9 | Julie Carey,Female,Dermatology,Sleep Medicine,MD,11,Griffin-Herring,Outpatient,Houston,TX,"Monday, 10:00 - 18:00",English,Spanish,Hindi,$438.00,3.8,GuardianHealth Solutions 10 | Douglas Wells,Male,Dermatology,Pediatric,DO,8,Gregory-Gibbs,Outpatient,Phoenix,AZ,"Monday, 11:00 - 19:00",English,French,,$454.00,3.6,LifeShield Insurance 11 | Richard Braun,Male,Dermatology,Sports Medicine,MD,28,Forbes-Walsh,Surgery,Houston,TX,"Tuesday, 14:00 - 19:00",English,Hindi,,$496.00,2.3,HealthFirst Coverage 12 | Stephen Williamson,Male,Neurology,Addiction Medicine,MBBS,37,"Sherman, Mooney and Weber",Radiology,Houston,TX,"Monday, 9:00 - 20:00",English,,,$333.00,1.7,PremierWell Insurance 13 | Michael White,Male,Pediatrics,Addiction Medicine,MD,16,"Sanchez, Roy and Wright",Radiology,Houston,TX,"Friday, 15:00 - 20:00",English,Hindi,Spanish,$205.00,4.7,LifeWell Coverage 14 | Taylor Sanford,Female,Dermatology,Sleep Medicine,MBBS,11,Adams-Brown,Outpatient,Phoenix,AZ,"Wednesday, 16:00 - 20:00",English,Mandarin,French,$152.00,1.1,HarmonyHealth Plans 15 | Brittney Reyes,Female,Gastroenterology,Sleep Medicine,MD,29,Jennings Group,Surgery,Los Angeles,CA,"Tuesday, 14:00 - 18:00",English,,,$230.00,4.9,EverCare Insurance 16 | Eric Williams,Male,Oncology,Pediatric,MD,27,"Carr, Bush and Bush",General Medicine,Phoenix,AZ,"Thursday, 9:00 - 18:00",English,Hindi,,$170.00,3.5,HealthFirst Coverage 17 | Robert Ramos,Male,Neurology,Sports Medicine,DO,14,Mendoza and Sons,General Medicine,Los Angeles,CA,"Friday, 17:00 - 19:00",English,Hindi,,$349.00,1.9,VitalityCare Health 18 | Michelle Chen,Female,Dermatology,Addiction Medicine,MD,15,"Lee, Burke and Rivers",Emergency,Houston,TX,"Friday, 17:00 - 18:00",English,Spanish,Mandarin,$474.00,1.7,UnityWell Insurance 19 | Taylor Baker,Male,Endocrinology,Geriatric,MS,27,Li-Nelson,Emergency,Houston,TX,"Thursday, 12:00 - 19:00",English,,,$298.00,1.7,SuperiorHealth Plans 20 | Brendan Olson,Male,Dermatology,Geriatric,MS,5,"Torres, Mejia and Moore",Radiology,Los Angeles,CA,"Friday, 17:00 - 19:00",English,Mandarin,,$238.00,2.4,HealthGuard Assurance 21 | David Meadows,Male,Neurology,Addiction Medicine,MD,25,Tran Ltd,General Medicine,Los Angeles,CA,"Wednesday, 8:00 - 19:00",English,,,$401.00,2.7,BrightWell Insurance 22 | Matthew Gentry,Male,Endocrinology,Pediatric,MD,29,Rosario Ltd,Radiology,Houston,TX,"Monday, 15:00 - 18:00",English,,,$452.00,2,SuperiorHealth Plans 23 | Sara Jimenez,Female,Endocrinology,Pediatric,DO,28,Edwards LLC,Radiology,Los Angeles,CA,"Thursday, 17:00 - 20:00",English,,,$115.00,4.2,TrueHealth Insurance 24 | Rebekah Sparks,Female,Urology,Pediatric,MD,30,Jennings-Phillips,Surgery,Houston,TX,"Friday, 13:00 - 18:00",English,,,$371.00,2.4,PureHealth Insurance 25 | Paul Miranda,Male,Psychiatry,Sports Medicine,MD,19,"Reid, Figueroa and Thomas",Emergency,Houston,TX,"Monday, 15:00 - 20:00",English,Mandarin,Spanish,$117.00,1.4,SuperiorHealth Plans, 26 | Kendra Clark,Female,Orthopedics,Pediatric,DO,14,Stevens Inc,General Medicine,Los Angeles,CA,"Monday, 15:00 - 20:00",English,French,,$196.00,2.6,HarmonyHealth Plans, 27 | Christina Bell,Female,Psychiatry,Pediatric,DO,38,"Anthony, Martinez and White",Outpatient,Houston,TX,"Friday, 9:00 - 20:00",English,Hindi,Spanish,$291.00,1.8,AdvantageCare Health, 28 | Corey Moore,Male,Pediatrics,Sleep Medicine,MBBS,14,Leonard-Brown,Emergency,New York,NY,"Wednesday, 14:00 - 18:00",English,Spanish,Hindi,$325.00,1.1,TrueHealth Insurance, 29 | Sean Wolf,Male,Dermatology,Geriatric,DO,25,Hicks PLC,Outpatient,Chicago,IL,"Wednesday, 16:00 - 19:00",English,Spanish,French,$425.00,3.3,OptimumCare Health, 30 | Holly Martinez,Female,Pediatrics,Geriatric,DO,9,Solis Group,Outpatient,Phoenix,AZ,"Tuesday, 13:00 - 20:00",English,French,,$404.00,4.1,HealthFirst Coverage, 31 | Karen Mcintyre,Female,Oncology,Addiction Medicine,MD,13,"Murphy, Cochran and Bush",Radiology,Los Angeles,CA,"Monday, 12:00 - 18:00",English,,,$367.00,3.8,PureHealth Insurance, 32 | James Perez,Male,Pediatrics,Sports Medicine,DO,31,Peterson Ltd,Radiology,Chicago,IL,"Thursday, 17:00 - 19:00",English,Hindi,,$354.00,4.8,AdvantageCare Health, 33 | Krystal Luna,Female,Gastroenterology,Sports Medicine,MS,20,Hicks-Santana,Radiology,Chicago,IL,"Friday, 11:00 - 20:00",English,,,$341.00,2.3,HarmonyHealth Plans, 34 | Melissa Gilbert,Female,Gastroenterology,Geriatric,DO,24,Stein-Davis,Outpatient,Los Angeles,CA,"Thursday, 11:00 - 18:00",English,Spanish,Hindi,$213.00,2.9,GuardianHealth Solutions, 35 | Jesse Levine,Male,Orthopedics,Sleep Medicine,MS,17,Jordan LLC,Emergency,Phoenix,AZ,"Tuesday, 13:00 - 19:00",English,,,$204.00,2.3,PremierWell Insurance, 36 | Alex Hill,Male,Cardiology,Sports Medicine,MBBS,15,Ashley PLC,Emergency,Phoenix,AZ,"Thursday, 16:00 - 19:00",English,,,$187.00,3.3,HealthPlus Coverage, 37 | Heather Miller,Female,Endocrinology,Addiction Medicine,MS,32,"Stout, Morales and Clark",General Medicine,New York,NY,"Friday, 8:00 - 20:00",English,,,$352.00,3.2,PremierWell Insurance, 38 | Mathew Bauer,Male,Pediatrics,Sports Medicine,MD,17,Berry-Roberson,Surgery,Chicago,IL,"Thursday, 13:00 - 19:00",English,Hindi,,$350.00,3.2,HealthPlus Coverage, 39 | Amber Lee,Female,Endocrinology,Sports Medicine,MS,18,Clay-Wood,Surgery,Houston,TX,"Wednesday, 9:00 - 20:00",English,French,,$275.00,1.2,EliteCare Insurers, 40 | Mallory Ward,Female,Dermatology,Pediatric,MD,13,Nguyen-Burns,General Medicine,New York,NY,"Wednesday, 13:00 - 18:00",English,,,$276.00,4.3,TrueHealth Insurance, 41 | James Lyons,Male,Pediatrics,Sports Medicine,MD,9,Smith-Huang,General Medicine,New York,NY,"Thursday, 11:00 - 18:00",English,Mandarin,Spanish,$242.00,3.9,PrimeHealth Assurance, 42 | Sheryl Walker MD,Female,Psychiatry,Geriatric,MD,34,Thornton-Rivera,Radiology,Houston,TX,"Tuesday, 10:00 - 19:00",English,Hindi,Spanish,$279.00,4,PremierWell Insurance, 43 | Jessica Boone,Female,Urology,Geriatric,MBBS,15,"Bradshaw, Cooley and Robinson",General Medicine,Los Angeles,CA,"Friday, 14:00 - 20:00",English,French,Hindi,$447.00,2.6,EverCare Insurance, 44 | Melissa Robbins,Female,Urology,Sports Medicine,MBBS,28,"Stuart, Craig and Mcguire",Radiology,New York,NY,"Tuesday, 8:00 - 18:00",English,Mandarin,,$459.00,3.2,LifeShield Insurance, 45 | Timothy Wheeler,Male,Pediatrics,Sleep Medicine,MBBS,30,Griffin PLC,Outpatient,New York,NY,"Thursday, 13:00 - 19:00",English,Hindi,,$274.00,3.8,EverCare Insurance, 46 | Casey Scott,Male,Psychiatry,Sleep Medicine,DO,6,Griffith Inc,Outpatient,Chicago,IL,"Friday, 8:00 - 20:00",English,Spanish,,$236.00,3.4,PremierWell Insurance, 47 | Teresa Powers,Female,Urology,Geriatric,DO,19,"Jefferson, Weiss and Foley",Radiology,Phoenix,AZ,"Friday, 16:00 - 20:00",English,French,Hindi,$208.00,3.7,GuardianHealth Solutions, 48 | Kathryn Luna,Female,Cardiology,Sports Medicine,MD,12,"Hughes, Reyes and Flores",Surgery,New York,NY,"Monday, 11:00 - 18:00",English,French,,$455.00,3.6,HealthGuard Assurance, 49 | Sara Jones,Female,Dermatology,Addiction Medicine,MD,12,"Pham, Anderson and Bell",Outpatient,New York,NY,"Wednesday, 8:00 - 20:00",English,Hindi,,$146.00,1.7,LifeWell Coverage, 50 | Susan Wilson,Female,Endocrinology,Geriatric,MS,12,Patrick Ltd,Emergency,Chicago,IL,"Tuesday, 15:00 - 19:00",English,Spanish,,$309.00,1.3,LifeWell Coverage, 51 | Brandon Jones,Male,Pediatrics,Addiction Medicine,MBBS,17,"Delacruz, Scott and Collins",Emergency,Los Angeles,CA,"Monday, 17:00 - 19:00",English,,,$246.00,2.6,SecureHealth Plans, 52 | Nicole Hall,Female,Gastroenterology,Sleep Medicine,MBBS,34,Robinson-Patel,Surgery,Chicago,IL,"Tuesday, 17:00 - 20:00",English,Hindi,Spanish,$195.00,4.5,BrightWell Insurance, 53 | Kristin Stephenson,Female,Endocrinology,Sleep Medicine,MS,26,Sandoval-Merritt,Emergency,Chicago,IL,"Monday, 8:00 - 19:00",English,,,$426.00,1.2,EliteCare Insurers, 54 | Deborah Mclaughlin,Female,Dermatology,Geriatric,MBBS,20,Crawford-Mccann,General Medicine,Phoenix,AZ,"Tuesday, 14:00 - 18:00",English,French,Spanish,$496.00,1.6,BrightWell Insurance, 55 | William Barron,Male,Dermatology,Sleep Medicine,MD,20,Olson Ltd,Emergency,Chicago,IL,"Wednesday, 13:00 - 20:00",English,Mandarin,,$477.00,3.7,HealthGuard Assurance, 56 | Jonathon Schwartz,Male,Cardiology,Pediatric,MBBS,29,Flores-Walsh,General Medicine,New York,NY,"Thursday, 16:00 - 18:00",English,French,Spanish,$219.00,1.8,HealthNet Solutions, 57 | Daniel Jenkins,Male,Psychiatry,Pediatric,MD,37,"Phillips, Mahoney and Levy",Outpatient,Chicago,IL,"Wednesday, 9:00 - 20:00",English,,,$468.00,3.4,HealthPlus Coverage, 58 | Sean Holland,Male,Endocrinology,Addiction Medicine,DO,7,"Rodgers, Patel and Powell",Outpatient,Houston,TX,"Tuesday, 12:00 - 19:00",English,Spanish,French,$142.00,4.9,PrimeHealth Assurance, 59 | Angela Whitaker,Female,Gastroenterology,Pediatric,MBBS,10,Carroll Ltd,Outpatient,Los Angeles,CA,"Friday, 9:00 - 19:00",English,Mandarin,,$465.00,4.1,EliteCare Insurers, 60 | Melissa Williams,Female,Gastroenterology,Addiction Medicine,MBBS,37,Campbell Group,Emergency,Los Angeles,CA,"Thursday, 13:00 - 18:00",English,French,Spanish,$357.00,2.3,VitalityCare Health, 61 | Denise Sheppard,Female,Psychiatry,Pediatric,MD,9,Norman and Sons,Outpatient,Los Angeles,CA,"Friday, 15:00 - 20:00",English,Mandarin,,$441.00,3.5,HealthGuard Assurance, 62 | Seth Ford,Male,Gastroenterology,Addiction Medicine,MS,36,"Carter, Torres and Bates",General Medicine,New York,NY,"Monday, 13:00 - 19:00",English,,,$348.00,3.3,AdvantageCare Health, 63 | Mary Young,Female,Pediatrics,Sleep Medicine,MD,35,"Garcia, Good and Ortiz",Emergency,Chicago,IL,"Thursday, 10:00 - 18:00",English,,,$167.00,2.3,SuperiorHealth Plans, 64 | Bryan Horne,Male,Psychiatry,Sports Medicine,MD,25,Acevedo-Taylor,General Medicine,Chicago,IL,"Friday, 11:00 - 19:00",English,,,$497.00,4.3,VitalityCare Health, 65 | Linda Vasquez,Female,Orthopedics,Addiction Medicine,MD,10,Colon PLC,Outpatient,Los Angeles,CA,"Thursday, 14:00 - 19:00",English,,,$252.00,3.8,LifeShield Insurance, 66 | Troy Booth,Male,Gastroenterology,Geriatric,MD,12,Thompson-Austin,Surgery,Phoenix,AZ,"Wednesday, 8:00 - 18:00",English,Spanish,,$353.00,4.3,LifeWell Coverage, 67 | Samantha Miller,Female,Cardiology,Addiction Medicine,DO,30,Yu-Weber,Outpatient,Chicago,IL,"Tuesday, 9:00 - 19:00",English,French,Mandarin,$164.00,4.3,HealthNet Solutions, 68 | Carolyn Berg,Female,Endocrinology,Addiction Medicine,MS,31,"Pena, Carter and Bowen",Surgery,Chicago,IL,"Wednesday, 9:00 - 19:00",English,Mandarin,Spanish,$331.00,5,EverCare Insurance, 69 | Kristen Bailey,Female,Oncology,Sports Medicine,MBBS,18,Parker-Young,General Medicine,Los Angeles,CA,"Tuesday, 8:00 - 18:00",English,,,$199.00,3.5,OptimumCare Health, 70 | Shelby Jackson,Female,Pediatrics,Addiction Medicine,MD,22,Webb Ltd,Emergency,Phoenix,AZ,"Thursday, 11:00 - 19:00",English,Spanish,,$331.00,3.9,TrueHealth Insurance, 71 | Edward Wright,Male,Orthopedics,Sports Medicine,MBBS,25,"Martin, Johnson and Kidd",General Medicine,New York,NY,"Friday, 15:00 - 20:00",English,Spanish,Mandarin,$212.00,3.9,SuperiorHealth Plans, 72 | Tara Webb,Female,Pediatrics,Sports Medicine,DO,29,Wright-Martinez,Radiology,Los Angeles,CA,"Thursday, 13:00 - 20:00",English,,,$471.00,2,PremierWell Insurance, 73 | Connie Rogers,Female,Urology,Pediatric,DO,11,Haas Ltd,Radiology,Phoenix,AZ,"Wednesday, 8:00 - 19:00",English,Hindi,,$496.00,2.5,HealthFirst Coverage, 74 | Jason Acosta,Male,Orthopedics,Geriatric,MS,16,"Price, Thomas and Welch",General Medicine,Los Angeles,CA,"Monday, 17:00 - 18:00",English,Spanish,,$429.00,3.3,OptimumCare Health, 75 | Danielle Jackson,Female,Gastroenterology,Addiction Medicine,MD,10,"Graham, Martinez and Gonzales",Surgery,Houston,TX,"Monday, 16:00 - 20:00",English,Mandarin,Hindi,$433.00,3.7,GuardianHealth Solutions, 76 | Amanda Miller DVM,Female,Dermatology,Sleep Medicine,MBBS,32,Garcia Ltd,Outpatient,Phoenix,AZ,"Thursday, 8:00 - 20:00",English,Hindi,Spanish,$231.00,3.5,PremierWell Insurance, 77 | Mr. Christopher Miller MD,Male,Psychiatry,Sports Medicine,DO,21,Jackson-Cummings,Emergency,Houston,TX,"Wednesday, 17:00 - 18:00",English,Mandarin,,$451.00,2.5,PrimeHealth Assurance, 78 | Kevin Mason,Male,Neurology,Pediatric,DO,32,Brown-Miller,Outpatient,Phoenix,AZ,"Wednesday, 15:00 - 18:00",English,Mandarin,,$261.00,4.3,TotalCare Coverage, 79 | Rebecca Simpson,Female,Neurology,Sports Medicine,MBBS,28,"Baldwin, Harris and Spencer",Surgery,Houston,TX,"Tuesday, 15:00 - 19:00",English,Spanish,,$129.00,4.3,UnityWell Insurance, 80 | Thomas Kelly,Male,Oncology,Addiction Medicine,MBBS,36,Howell LLC,Radiology,Los Angeles,CA,"Friday, 8:00 - 20:00",English,Spanish,Hindi,$160.00,2.6,GuardianHealth Solutions, 81 | Andrew Thomas,Male,Gastroenterology,Pediatric,MBBS,8,"Williams, Cox and Taylor",Emergency,Chicago,IL,"Thursday, 10:00 - 19:00",English,Hindi,French,$330.00,1.3,GuardianHealth Solutions, 82 | Susan Rodriguez,Female,Gastroenterology,Addiction Medicine,MS,7,Kane-Phillips,Emergency,Los Angeles,CA,"Friday, 10:00 - 20:00",English,Mandarin,Spanish,$277.00,4.2,PureHealth Insurance, 83 | Angela Payne,Female,Gastroenterology,Sleep Medicine,MD,38,Evans LLC,Emergency,Los Angeles,CA,"Wednesday, 17:00 - 18:00",English,French,,$419.00,1.5,HealthNet Solutions, 84 | Kristie Potter,Female,Endocrinology,Geriatric,MD,25,Murphy LLC,Emergency,New York,NY,"Thursday, 12:00 - 18:00",English,Mandarin,,$235.00,4.7,HarmonyHealth Plans, 85 | Sarah Hudson,Female,Oncology,Addiction Medicine,MBBS,20,"Porter, Barrera and Snyder",Surgery,Phoenix,AZ,"Thursday, 12:00 - 19:00",English,,,$403.00,4.8,TrueHealth Insurance, 86 | Sue Whitaker,Female,Neurology,Addiction Medicine,MD,6,Kennedy-Newman,General Medicine,Chicago,IL,"Wednesday, 12:00 - 20:00",English,,,$440.00,1.5,SecureHealth Plans, 87 | Cody Chavez,Male,Neurology,Pediatric,DO,35,Moore-James,Surgery,New York,NY,"Tuesday, 12:00 - 20:00",English,Spanish,,$112.00,4.9,HarmonyHealth Plans, 88 | Thomas Smith,Male,Orthopedics,Geriatric,MD,31,Browning-Smith,General Medicine,Phoenix,AZ,"Tuesday, 12:00 - 20:00",English,Mandarin,,$104.00,2.1,LifeShield Insurance, 89 | Samuel Moreno,Male,Psychiatry,Sleep Medicine,DO,29,"Velasquez, Zuniga and Reynolds",Emergency,Houston,TX,"Thursday, 11:00 - 19:00",English,Spanish,Mandarin,$324.00,2.2,LifeWell Coverage, 90 | Elizabeth Bright,Female,Orthopedics,Sleep Medicine,MD,35,"Webb, Wong and Nichols",Emergency,New York,NY,"Thursday, 11:00 - 20:00",English,,,$102.00,3,VitalityCare Health, 91 | William Williams,Male,Pediatrics,Sports Medicine,MBBS,21,Rangel-Ferrell,Emergency,Phoenix,AZ,"Monday, 11:00 - 20:00",English,Hindi,French,$289.00,2.8,TotalCare Coverage, 92 | Christopher Huff,Male,Pediatrics,Addiction Medicine,MD,37,Smith and Sons,Emergency,Houston,TX,"Thursday, 17:00 - 18:00",English,Hindi,,$117.00,3.5,HealthFirst Coverage, 93 | Richard Foster,Male,Cardiology,Sleep Medicine,DO,39,"Rodriguez, Thompson and Marquez",Surgery,Phoenix,AZ,"Monday, 12:00 - 19:00",English,French,Hindi,$479.00,1.2,UnityWell Insurance, 94 | Sharon Gutierrez,Female,Urology,Addiction Medicine,MS,9,"Foster, Martin and Horn",General Medicine,Houston,TX,"Friday, 13:00 - 19:00",English,,,$189.00,1.4,EverCare Insurance, 95 | Melissa Martin,Female,Dermatology,Sleep Medicine,MS,10,Walsh-Williams,General Medicine,Houston,TX,"Thursday, 15:00 - 19:00",English,French,,$244.00,4,GuardianHealth Solutions, 96 | Dean Curtis,Male,Dermatology,Pediatric,MS,14,"Soto, Rodriguez and Stanley",Surgery,Los Angeles,CA,"Thursday, 16:00 - 18:00",English,,,$121.00,2.4,EverCare Insurance, 97 | Jeremy King,Male,Pediatrics,Sleep Medicine,DO,17,Lewis-Reese,Outpatient,Phoenix,AZ,"Friday, 9:00 - 20:00",English,Mandarin,,$195.00,4,HealthGuard Assurance, 98 | Paul Fisher,Male,Orthopedics,Geriatric,MBBS,31,"King, Adams and Mccormick",General Medicine,Chicago,IL,"Thursday, 14:00 - 19:00",English,Spanish,French,$458.00,2.9,HealthGuard Assurance, 99 | Lori Vaughan,Female,Pediatrics,Sports Medicine,MD,24,Hunter-Chavez,Surgery,Houston,TX,"Tuesday, 12:00 - 19:00",English,Mandarin,Spanish,$472.00,2.8,UnityWell Insurance, 100 | Rhonda Evans,Female,Neurology,Sports Medicine,DO,29,Weaver-Clark,Surgery,Phoenix,AZ,"Wednesday, 12:00 - 18:00",English,Spanish,,$296.00,2.2,ProHealth Assurance, 101 | Jeremy Henson,Male,Oncology,Sleep Medicine,MD,39,Morrison-Ross,General Medicine,Chicago,IL,"Tuesday, 13:00 - 20:00",English,Mandarin,,$333.00,4.1,HealthFirst Coverage, -------------------------------------------------------------------------------- /examples/assets/specialists.json: -------------------------------------------------------------------------------- 1 | { 2 | "entities": [ 3 | { 4 | "name": "Name", 5 | "set_type_as": "", 6 | "property_columns": [ 7 | "Gender", 8 | "Qualifications", 9 | "Years of Experience", 10 | "Availability", 11 | "Consultation Fees", 12 | "Patient Ratings", 13 | "Insurance Accepted" 14 | ], 15 | "description": "" 16 | }, 17 | { 18 | "name": "Specialization", 19 | "set_type_as": "Specialty", 20 | "property_columns": [], 21 | "description": "" 22 | }, 23 | { 24 | "name": "Subspecialties", 25 | "set_type_as": "Specialty", 26 | "property_columns": [], 27 | "description": "" 28 | }, 29 | { 30 | "name": "Hospital Affiliation", 31 | "set_type_as": "Hospital", 32 | "property_columns": [], 33 | "description": "" 34 | }, 35 | { 36 | "name": "City", 37 | "set_type_as": "", 38 | "property_columns": [], 39 | "description": "" 40 | }, 41 | { 42 | "name": "Language 1", 43 | "set_type_as": "Language", 44 | "property_columns": [], 45 | "description": "" 46 | }, 47 | { 48 | "name": "Language 2", 49 | "set_type_as": "Language", 50 | "property_columns": [], 51 | "description": "" 52 | }, 53 | { 54 | "name": "Language 3", 55 | "set_type_as": "Language", 56 | "property_columns": [], 57 | "description": "" 58 | } 59 | ], 60 | "patterns": [ 61 | { 62 | "head": "Name", 63 | "relation": "specializes_in", 64 | "tail": "Specialization", 65 | "description": "" 66 | }, 67 | { 68 | "head": "Name", 69 | "relation": "specializes_in", 70 | "tail": "Subspecialties", 71 | "description": "" 72 | }, 73 | { 74 | "head": "Name", 75 | "relation": "works_at", 76 | "tail": "Hospital Affiliation", 77 | "description": "" 78 | }, 79 | { 80 | "head": "Name", 81 | "relation": "live_in", 82 | "tail": "City", 83 | "description": "" 84 | }, 85 | { 86 | "head": "Name", 87 | "relation": "speaks", 88 | "tail": "Language 1", 89 | "description": "" 90 | }, 91 | { 92 | "head": "Name", 93 | "relation": "speaks", 94 | "tail": "Language 2", 95 | "description": "" 96 | }, 97 | { 98 | "head": "Name", 99 | "relation": "speaks", 100 | "tail": "Language 3", 101 | "description": "" 102 | } 103 | ] 104 | } 105 | -------------------------------------------------------------------------------- /examples/create_graph_from_csv.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Create a knowledge graph with a CSV and a preset schema" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "# Import the required libraries\n", 17 | "\n", 18 | "import os\n", 19 | "import json\n", 20 | "\n", 21 | "from dotenv import load_dotenv\n", 22 | "\n", 23 | "load_dotenv()" 24 | ] 25 | }, 26 | { 27 | "cell_type": "code", 28 | "execution_count": null, 29 | "metadata": {}, 30 | "outputs": [], 31 | "source": [ 32 | "# Import whyhow\n", 33 | "from whyhow import WhyHow" 34 | ] 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": {}, 40 | "outputs": [], 41 | "source": [ 42 | "# Initialize the whyhow client\n", 43 | "client = WhyHow(\n", 44 | " api_key = os.environ.get(\"WHYHOW_API_KEY\"),\n", 45 | " openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n", 46 | " pinecone_api_key=os.getenv(\"PINECONE_API_KEY\"),\n", 47 | " neo4j_url=os.getenv(\"NEO4J_URI\"),\n", 48 | " neo4j_user=os.getenv(\"NEO4J_USER\"),\n", 49 | " neo4j_password=os.getenv(\"NEO4J_PASSWORD\"),\n", 50 | ")" 51 | ] 52 | }, 53 | { 54 | "cell_type": "code", 55 | "execution_count": null, 56 | "metadata": {}, 57 | "outputs": [], 58 | "source": [ 59 | "namespace = \"whyhow_csv\"\n", 60 | "documents = [\"../examples/assets/specialists.csv\"]\n", 61 | "schema_file = \"../examples/assets/specialists.json\"" 62 | ] 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "metadata": {}, 67 | "source": [ 68 | "### Generate a schema\n", 69 | "With CSVs, you can automatically generate a schema for your document. The schema will automatically include all column names. You can modify the output to make the schema your own " 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "metadata": {}, 76 | "outputs": [], 77 | "source": [ 78 | "# Note, you can only generate a schema from a csv file and only one csv file at a time\n", 79 | "\n", 80 | "schema = client.graph.generate_schema(documents=documents)\n", 81 | "print(schema)" 82 | ] 83 | }, 84 | { 85 | "cell_type": "markdown", 86 | "metadata": {}, 87 | "source": [ 88 | "### Add documents to namespace\n", 89 | "Upload one CSV file to the namespace. In this version, you can only upload one CSV file to a namespace so as not to confuse the schema." 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [ 98 | "# Add csv\n", 99 | "documents_response = client.graph.add_documents(\n", 100 | " namespace=namespace, documents=documents)\n" 101 | ] 102 | }, 103 | { 104 | "cell_type": "markdown", 105 | "metadata": {}, 106 | "source": [ 107 | "### Create graph from CSV\n", 108 | "Use the `create_graph_from_csv` function to create a graph from the uploaded CSV file. The function will automatically use the schema provided to generate the graph" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": null, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "csv_graph = client.graph.create_graph_from_csv(\n", 118 | " namespace=namespace, schema_file=schema_file\n", 119 | ")\n", 120 | "\n", 121 | "print(csv_graph)" 122 | ] 123 | }, 124 | { 125 | "cell_type": "markdown", 126 | "metadata": {}, 127 | "source": [ 128 | "### Query the graph" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": {}, 135 | "outputs": [], 136 | "source": [ 137 | "# Query the graph using a natural language query\n", 138 | "query = \"Who speaks English and lives in Houston?\"\n", 139 | "\n", 140 | "query_response = client.graph.query_graph(\n", 141 | " namespace=namespace, query=query, include_triples=True)\n", 142 | "\n", 143 | "print(query_response)" 144 | ] 145 | }, 146 | { 147 | "cell_type": "markdown", 148 | "metadata": {}, 149 | "source": [ 150 | "### Query the graph with specific entities and relations" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": null, 156 | "metadata": {}, 157 | "outputs": [], 158 | "source": [ 159 | "# Query the graph using specific entities and relations\n", 160 | "query = \"Who speaks English and live in Houston?\"\n", 161 | "entities = [\"English\",\"Houston\"]\n", 162 | "relations = [\"SPEAKS\", \"LIVE_IN\"]\n", 163 | "\n", 164 | "specific_query_response = client.graph.query_graph_specific(\n", 165 | " namespace=namespace, \n", 166 | " query=query, \n", 167 | " entities=entities, \n", 168 | " relations=relations,\n", 169 | " include_triples=False,\n", 170 | " include_chunks=False,\n", 171 | ")\n", 172 | "\n", 173 | "print(specific_query_response)" 174 | ] 175 | } 176 | ], 177 | "metadata": { 178 | "kernelspec": { 179 | "display_name": "venv", 180 | "language": "python", 181 | "name": "python3" 182 | }, 183 | "language_info": { 184 | "codemirror_mode": { 185 | "name": "ipython", 186 | "version": 3 187 | }, 188 | "file_extension": ".py", 189 | "mimetype": "text/x-python", 190 | "name": "python", 191 | "nbconvert_exporter": "python", 192 | "pygments_lexer": "ipython3", 193 | "version": "3.11.5" 194 | } 195 | }, 196 | "nbformat": 4, 197 | "nbformat_minor": 2 198 | } 199 | -------------------------------------------------------------------------------- /examples/create_graph_from_questions.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Create a knowledge graph with questions" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "\n", 18 | "from dotenv import load_dotenv\n", 19 | "\n", 20 | "load_dotenv()" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "from whyhow import WhyHow" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "client = WhyHow(\n", 39 | " api_key = os.environ.get(\"WHYHOW_API_KEY\"),\n", 40 | " openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n", 41 | " pinecone_api_key=os.getenv(\"PINECONE_API_KEY\"),\n", 42 | " neo4j_url=os.getenv(\"NEO4J_URI\"),\n", 43 | " neo4j_user=os.getenv(\"NEO4J_USER\"),\n", 44 | " neo4j_password=os.getenv(\"NEO4J_PASSWORD\"),\n", 45 | ")" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "metadata": {}, 52 | "outputs": [], 53 | "source": [ 54 | "namespace = \"\"\n", 55 | "documents = []\n", 56 | "questions = []" 57 | ] 58 | }, 59 | { 60 | "cell_type": "markdown", 61 | "metadata": {}, 62 | "source": [ 63 | "### Add documents to database" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": null, 69 | "metadata": {}, 70 | "outputs": [], 71 | "source": [ 72 | "# Add documents\n", 73 | "documents_response = client.graph.add_documents(namespace, documents)\n", 74 | "print(documents_response)" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "metadata": {}, 80 | "source": [ 81 | "### Create the graph" 82 | ] 83 | }, 84 | { 85 | "cell_type": "code", 86 | "execution_count": null, 87 | "metadata": {}, 88 | "outputs": [], 89 | "source": [ 90 | "# Create a graph\n", 91 | "extracted_graph = client.graph.create_graph(\n", 92 | " namespace = namespace, \n", 93 | " questions = questions\n", 94 | ")\n", 95 | "\n", 96 | "print(extracted_graph)" 97 | ] 98 | }, 99 | { 100 | "cell_type": "markdown", 101 | "metadata": {}, 102 | "source": [ 103 | "### Query the graph" 104 | ] 105 | }, 106 | { 107 | "cell_type": "code", 108 | "execution_count": null, 109 | "metadata": {}, 110 | "outputs": [], 111 | "source": [ 112 | "# Query the graph\n", 113 | "query = \"\"\n", 114 | "query_response = client.graph.query_graph(\n", 115 | " namespace = namespace, \n", 116 | " query = query\n", 117 | ")\n", 118 | "\n", 119 | "print(query_response)" 120 | ] 121 | } 122 | ], 123 | "metadata": { 124 | "kernelspec": { 125 | "display_name": "venv", 126 | "language": "python", 127 | "name": "python3" 128 | }, 129 | "language_info": { 130 | "codemirror_mode": { 131 | "name": "ipython", 132 | "version": 3 133 | }, 134 | "file_extension": ".py", 135 | "mimetype": "text/x-python", 136 | "name": "python", 137 | "nbconvert_exporter": "python", 138 | "pygments_lexer": "ipython3", 139 | "version": "3.10.13" 140 | } 141 | }, 142 | "nbformat": 4, 143 | "nbformat_minor": 2 144 | } 145 | -------------------------------------------------------------------------------- /examples/create_graph_from_schema.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": {}, 6 | "source": [ 7 | "# Create a knowledge graph with a preset schema" 8 | ] 9 | }, 10 | { 11 | "cell_type": "code", 12 | "execution_count": null, 13 | "metadata": {}, 14 | "outputs": [], 15 | "source": [ 16 | "import os\n", 17 | "import json\n", 18 | "\n", 19 | "from dotenv import load_dotenv\n", 20 | "\n", 21 | "load_dotenv()" 22 | ] 23 | }, 24 | { 25 | "cell_type": "code", 26 | "execution_count": null, 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from whyhow import WhyHow" 31 | ] 32 | }, 33 | { 34 | "cell_type": "code", 35 | "execution_count": null, 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "client = WhyHow(\n", 40 | " api_key = os.environ.get(\"WHYHOW_API_KEY\"),\n", 41 | " openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n", 42 | " pinecone_api_key=os.getenv(\"PINECONE_API_KEY\"),\n", 43 | " neo4j_url=os.getenv(\"NEO4J_URI\"),\n", 44 | " neo4j_user=os.getenv(\"NEO4J_USER\"),\n", 45 | " neo4j_password=os.getenv(\"NEO4J_PASSWORD\"),\n", 46 | ")" 47 | ] 48 | }, 49 | { 50 | "cell_type": "code", 51 | "execution_count": null, 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "documents = []\n", 56 | "namespace = \"\"\n", 57 | "schema_file = \"\"" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "metadata": {}, 63 | "source": [ 64 | "### Add documents to database" 65 | ] 66 | }, 67 | { 68 | "cell_type": "code", 69 | "execution_count": null, 70 | "metadata": {}, 71 | "outputs": [], 72 | "source": [ 73 | "# Add documents\n", 74 | "documents_response = client.graph.add_documents(\n", 75 | " namespace=namespace, documents=documents)\n", 76 | "\n", 77 | "print(documents_response)" 78 | ] 79 | }, 80 | { 81 | "cell_type": "markdown", 82 | "metadata": {}, 83 | "source": [ 84 | "### Create the graph" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "metadata": {}, 91 | "outputs": [], 92 | "source": [ 93 | "# Create a graph\n", 94 | "extracted_graph = client.graph.create_graph_from_schema(\n", 95 | " namespace=namespace, schema_file=schema_file\n", 96 | ")\n", 97 | "\n", 98 | "print(extracted_graph)" 99 | ] 100 | }, 101 | { 102 | "cell_type": "markdown", 103 | "metadata": {}, 104 | "source": [ 105 | "### Query the graph" 106 | ] 107 | }, 108 | { 109 | "cell_type": "code", 110 | "execution_count": null, 111 | "metadata": {}, 112 | "outputs": [], 113 | "source": [ 114 | "# Query the graph\n", 115 | "query = \"\"\n", 116 | "\n", 117 | "entities = [\"\"]\n", 118 | "relations = [\"\"]\n", 119 | "\n", 120 | "query_response = client.graph.query_graph_specific(\n", 121 | " namespace=namespace, \n", 122 | " query=query,\n", 123 | " entities=entities,\n", 124 | " relations=relations,\n", 125 | " include_triples=False\n", 126 | " include_chunks=False\n", 127 | ")\n", 128 | "\n", 129 | "print(query_response.answer)" 130 | ] 131 | } 132 | ], 133 | "metadata": { 134 | "kernelspec": { 135 | "display_name": "venv", 136 | "language": "python", 137 | "name": "python3" 138 | }, 139 | "language_info": { 140 | "codemirror_mode": { 141 | "name": "ipython", 142 | "version": 3 143 | }, 144 | "file_extension": ".py", 145 | "mimetype": "text/x-python", 146 | "name": "python", 147 | "nbconvert_exporter": "python", 148 | "pygments_lexer": "ipython3", 149 | "version": "3.10.13" 150 | } 151 | }, 152 | "nbformat": 4, 153 | "nbformat_minor": 2 154 | } 155 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: WhyHow SDK 2 | nav: 3 | - Home: index.md 4 | - Installation: installation.md 5 | - Tutorial: tutorial.md 6 | - API Documentation: api.md 7 | 8 | theme: 9 | name: material 10 | palette: 11 | scheme: slate 12 | features: 13 | - content.code.copy 14 | - search.suggest 15 | - search.highlight 16 | - toc.follow 17 | 18 | plugins: 19 | - search 20 | - mkdocstrings: 21 | handlers: 22 | python: 23 | options: 24 | docstring_style: numpy 25 | show_root_heading: true 26 | 27 | markdown_extensions: 28 | - toc: 29 | permalink: true 30 | toc_depth: 3 31 | - admonition 32 | - tables 33 | - pymdownx.details 34 | - pymdownx.highlight: 35 | anchor_linenums: true 36 | line_spans: __span 37 | pygments_lang_class: true 38 | - pymdownx.inlinehilite 39 | - pymdownx.snippets 40 | - pymdownx.superfences 41 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.2"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "whyhow" 7 | authors = [ 8 | {name = "Tom Smoker", email = "tom@whyhow.ai"}, 9 | {name = "Chris Rec", email = "chris@whyhow.ai"} 10 | ] 11 | description = "Whyhow automated KG SDK" 12 | keywords = ["SDK", "KG"] 13 | classifiers = ["Programming Language :: Python :: 3"] 14 | requires-python = ">=3.10" 15 | dependencies = [ 16 | "httpx", 17 | "pydantic>1", 18 | ] 19 | dynamic = ["version"] 20 | readme = "README.md" 21 | license = {text = "MIT"} 22 | 23 | [project.optional-dependencies] 24 | dev = [ 25 | "bandit[toml]", 26 | "black", 27 | "flake8", 28 | "flake8-docstrings", 29 | "fpdf", 30 | "isort", 31 | "mypy", 32 | "pydocstyle[toml]", 33 | "pytest-asyncio", 34 | "pytest-cov", 35 | "pytest-httpx", 36 | "pytest", 37 | ] 38 | docs = [ 39 | "mkdocs", 40 | "mkdocstrings[python]", 41 | "mkdocs-material", 42 | "pymdown-extensions", 43 | ] 44 | 45 | [project.urls] 46 | Homepage = "https://github.com/whyhow-ai/whyhow" 47 | 48 | [project.scripts] 49 | 50 | [tool.setuptools] 51 | zip-safe = false 52 | include-package-data = true 53 | package-dir = {"" = "src"} 54 | 55 | [tool.setuptools.packages.find] 56 | where = ["src"] 57 | namespaces = false 58 | 59 | [tool.setuptools.package-data] 60 | "*" = ["*.txt", "*.rst", "*.typed"] 61 | 62 | [tool.setuptools.dynamic] 63 | version = {attr = "whyhow.__version__"} 64 | 65 | [tool.pydocstyle] 66 | convention = "numpy" 67 | add-ignore = "D301" 68 | 69 | [tool.bandit] 70 | 71 | [tool.black] 72 | line-length = 79 73 | preview = true 74 | 75 | [tool.isort] 76 | profile = "black" 77 | line_length = 79 78 | 79 | [tool.mypy] 80 | plugins = [ 81 | "pydantic.mypy" 82 | ] 83 | python_version = "3.10" 84 | ignore_missing_imports = true 85 | no_implicit_optional = true 86 | check_untyped_defs = true 87 | strict_equality = true 88 | warn_redundant_casts = true 89 | warn_unused_ignores = true 90 | show_error_codes = true 91 | disallow_any_generics = true 92 | disallow_incomplete_defs = true 93 | disallow_untyped_defs = true 94 | 95 | [tool.pydantic-mypy] 96 | init_forbid_extra = true 97 | init_typed = true 98 | warn_required_dynamic_aliases = true 99 | 100 | [tool.pytest.ini_options] 101 | filterwarnings = [ 102 | "error", 103 | "ignore:There is no current event loop", 104 | ] 105 | testpaths = [ 106 | "tests", 107 | ] 108 | addopts = "--cov=src/ -v --cov-report=term-missing --durations=20" 109 | log_cli = false 110 | -------------------------------------------------------------------------------- /src/whyhow/__init__.py: -------------------------------------------------------------------------------- 1 | """WhyHow SDK.""" 2 | 3 | from whyhow.client import AsyncWhyHow, WhyHow 4 | 5 | __version__ = "v0.0.7" 6 | __all__ = ["AsyncWhyHow", "WhyHow"] 7 | -------------------------------------------------------------------------------- /src/whyhow/apis/__init__.py: -------------------------------------------------------------------------------- 1 | """Actual implementation of sending requests.""" 2 | -------------------------------------------------------------------------------- /src/whyhow/apis/base.py: -------------------------------------------------------------------------------- 1 | """Base classes for API schemas.""" 2 | 3 | from abc import ABC 4 | 5 | from httpx import AsyncClient, Client 6 | from pydantic import BaseModel, ConfigDict 7 | 8 | 9 | class APIBase(BaseModel, ABC): 10 | """Base class for API schemas.""" 11 | 12 | model_config = ConfigDict(arbitrary_types_allowed=True) 13 | 14 | client: Client 15 | prefix: str = "" 16 | 17 | 18 | class AsyncAPIBase(BaseModel, ABC): 19 | """Base class for async API schemas.""" 20 | 21 | model_config = ConfigDict(arbitrary_types_allowed=True) 22 | 23 | client: AsyncClient 24 | prefix: str = "" 25 | -------------------------------------------------------------------------------- /src/whyhow/apis/graph.py: -------------------------------------------------------------------------------- 1 | """Interacting with the graph API.""" 2 | 3 | import csv 4 | import json 5 | import os 6 | from pathlib import Path 7 | 8 | from whyhow.apis.base import APIBase 9 | from whyhow.schemas.common import Schema as SchemaModel 10 | from whyhow.schemas.graph import ( 11 | AddDocumentsResponse, 12 | CreateGraphResponse, 13 | CreateQuestionGraphRequest, 14 | CreateSchemaGraphRequest, 15 | QueryGraphRequest, 16 | QueryGraphResponse, 17 | SpecificQueryGraphRequest, 18 | SpecificQueryGraphResponse, 19 | ) 20 | 21 | 22 | class GraphAPI(APIBase): 23 | """Interacting with the graph API synchronously.""" 24 | 25 | def add_documents(self, namespace: str, documents: list[str]) -> str: 26 | """Add documents to the graph. 27 | 28 | Parameters 29 | ---------- 30 | namespace : str 31 | The namespace of the graph. 32 | 33 | documents : list[str] 34 | The documents to add. 35 | """ 36 | if not documents: 37 | raise ValueError("No documents provided") 38 | 39 | document_paths = [Path(document) for document in documents] 40 | if not all(document_path.exists() for document_path in document_paths): 41 | raise ValueError("Not all documents exist") 42 | 43 | if not all( 44 | document_path.suffix in [".pdf", ".csv"] 45 | for document_path in document_paths 46 | ): 47 | raise ValueError("Only PDFs and CSVs are supported") 48 | 49 | if ( 50 | sum( 51 | os.path.getsize(document_path) 52 | for document_path in document_paths 53 | ) 54 | > 8388600 55 | ): 56 | raise ValueError( 57 | "PDFs too large, please limit your total upload size to <8MB." 58 | ) 59 | 60 | if any( 61 | document_path.suffix == ".csv" for document_path in document_paths 62 | ): 63 | if len(document_paths) > 1: 64 | raise ValueError( 65 | "Too many documents" 66 | "Please limit CSV uploads to 1 file during the beta." 67 | ) 68 | 69 | if len(document_paths) > 3: 70 | raise ValueError( 71 | "Too many documents" 72 | "Please limit PDF uploads to 3 files during the beta." 73 | ) 74 | 75 | files = [ 76 | ( 77 | "documents", 78 | (document_path.name, open(document_path, "rb")), 79 | ) 80 | for document_path in document_paths 81 | ] 82 | 83 | raw_response = self.client.post( 84 | f"{self.prefix}/{namespace}/add_documents", 85 | files=files, 86 | ) 87 | 88 | raw_response.raise_for_status() 89 | 90 | response = AddDocumentsResponse.model_validate(raw_response.json()) 91 | 92 | return response.message 93 | 94 | def generate_schema(self, documents: list[str]) -> str: 95 | """Generate a schema from CSV document.""" 96 | if not documents: 97 | raise ValueError("No documents provided") 98 | 99 | document_paths = [Path(document) for document in documents] 100 | if not all(document_path.exists() for document_path in document_paths): 101 | raise ValueError("Not all documents exist") 102 | 103 | if not all( 104 | document_path.suffix in [".csv"] 105 | for document_path in document_paths 106 | ): 107 | raise ValueError( 108 | "Only CSVs are supported" 109 | "for local schema generation right now." 110 | ) 111 | 112 | if any( 113 | document_path.suffix == ".csv" for document_path in document_paths 114 | ): 115 | if len(document_paths) > 1: 116 | raise ValueError( 117 | "Too many documents" 118 | "can only generate schema for one document at a time." 119 | ) 120 | entities = [] 121 | patterns = [] 122 | 123 | with open(document_paths[0], newline="", encoding="utf-8-sig") as f: 124 | reader = csv.reader(f) 125 | for row in reader: 126 | for i in range(len(row) - 1): 127 | _pattern = { 128 | "head": row[0], 129 | "relation": ( 130 | f"has_{row[i+1].lower().replace(' ', '_')}" 131 | ), 132 | "tail": row[i + 1], 133 | "description": "", 134 | } 135 | patterns.append(_pattern) 136 | for i in range(len(row)): 137 | _entity = { 138 | "name": row[i], 139 | "set_type_as": "", 140 | "property_columns": [], 141 | "description": "", 142 | } 143 | entities.append(_entity) 144 | break 145 | 146 | return json.dumps( 147 | {"entities": entities, "patterns": patterns}, indent=4 148 | ) 149 | 150 | def create_graph(self, namespace: str, questions: list[str]) -> str: 151 | """Create a new graph. 152 | 153 | Parameters 154 | ---------- 155 | namespace : str 156 | The namespace of the graph to create. 157 | questions : list[str] 158 | The seed concepts to initialize the graph with. 159 | """ 160 | if not questions: 161 | raise ValueError("No questions provided") 162 | 163 | request_body = CreateQuestionGraphRequest(questions=questions) 164 | 165 | raw_response = self.client.post( 166 | f"{self.prefix}/{namespace}/create_graph", 167 | json=request_body.model_dump(), 168 | ) 169 | 170 | raw_response.raise_for_status() 171 | 172 | response = CreateGraphResponse.model_validate(raw_response.json()) 173 | 174 | return response.message 175 | 176 | def create_graph_from_schema( 177 | self, namespace: str, schema_file: str 178 | ) -> str: 179 | """Create a new graph based on a user-defined schema. 180 | 181 | Parameters 182 | ---------- 183 | namespace : str 184 | The namespace of the graph to create. 185 | schema_file : str 186 | The schema file to use to build the graph. 187 | """ 188 | if not schema_file: 189 | raise ValueError("No schema provided") 190 | 191 | with open(schema_file, "r") as file: 192 | schema_data = json.load(file) 193 | 194 | schema_model = SchemaModel(**schema_data) 195 | 196 | request_body = CreateSchemaGraphRequest(graph_schema=schema_model) 197 | 198 | raw_response = self.client.post( 199 | f"{self.prefix}/{namespace}/create_graph_from_schema", 200 | json=request_body.model_dump(), 201 | ) 202 | 203 | raw_response.raise_for_status() 204 | 205 | response = CreateGraphResponse.model_validate(raw_response.json()) 206 | 207 | return response.message 208 | 209 | def create_graph_from_csv(self, namespace: str, schema_file: str) -> str: 210 | """Create a new graph using a CSV based on a user-defined schema. 211 | 212 | Parameters 213 | ---------- 214 | namespace : str 215 | The namespace of the graph to create. 216 | schema_file : str 217 | The schema file to use to build the graph. 218 | """ 219 | if not schema_file: 220 | raise ValueError("No schema provided") 221 | 222 | with open(schema_file, "r", encoding="utf-8-sig") as file: 223 | schema_data = json.load(file) 224 | for entity in schema_data["entities"]: 225 | for property in entity["property_columns"]: 226 | if property.lower() in ["name", "namespace"]: 227 | raise ValueError( 228 | f"The values 'name' and 'namespace'" 229 | f"are not allowed in property_columns." 230 | f"Found '{property}'." 231 | ) 232 | 233 | schema_model = SchemaModel(**schema_data) 234 | 235 | request_body = CreateSchemaGraphRequest(graph_schema=schema_model) 236 | 237 | raw_response = self.client.post( 238 | f"{self.prefix}/{namespace}/create_graph_from_csv", 239 | json=request_body.model_dump(), 240 | ) 241 | 242 | raw_response.raise_for_status() 243 | 244 | response = CreateGraphResponse.model_validate(raw_response.json()) 245 | 246 | return response.message 247 | 248 | def query_graph( 249 | self, 250 | namespace: str, 251 | query: str, 252 | include_triples: bool = False, 253 | include_chunks: bool = False, 254 | ) -> QueryGraphResponse: 255 | """Query the graph. 256 | 257 | Parameters 258 | ---------- 259 | namespace : str 260 | The namespace of the graph. 261 | 262 | query : str 263 | The query to run. 264 | 265 | Returns 266 | ------- 267 | QueryGraphResponse 268 | The namespace, answer, triples, and chunks and Cypher query. 269 | 270 | """ 271 | request_body = QueryGraphRequest( 272 | query=query, 273 | include_triples=include_triples, 274 | include_chunks=include_chunks, 275 | ) 276 | 277 | raw_response = self.client.post( 278 | f"{self.prefix}/{namespace}/query", 279 | json=request_body.model_dump(), 280 | ) 281 | 282 | raw_response.raise_for_status() 283 | 284 | response = QueryGraphResponse.model_validate(raw_response.json()) 285 | 286 | # retval = QueryGraphReturn(answer=response.answer) 287 | 288 | return response 289 | 290 | def query_graph_specific( 291 | self, 292 | namespace: str, 293 | query: str, 294 | entities: list[str] = [], 295 | relations: list[str] = [], 296 | include_triples: bool = False, 297 | include_chunks: bool = False, 298 | ) -> SpecificQueryGraphResponse: 299 | """Query the graph with specific entities and relations. 300 | 301 | Parameters 302 | ---------- 303 | namespace : str 304 | The namespace of the graph. 305 | 306 | entities : list[str] 307 | The entities to query. 308 | 309 | relations : list[str] 310 | The relations to query. 311 | 312 | Returns 313 | ------- 314 | SpecificQueryGraphResponse 315 | The namespace, answer, triples, and chunks. 316 | 317 | """ 318 | request_body = SpecificQueryGraphRequest( 319 | query=query, 320 | entities=entities, 321 | relations=relations, 322 | include_triples=include_triples, 323 | include_chunks=include_chunks, 324 | ) 325 | 326 | raw_response = self.client.post( 327 | f"{self.prefix}/{namespace}/specific_query", 328 | json=request_body.model_dump(), 329 | ) 330 | 331 | raw_response.raise_for_status() 332 | 333 | response = SpecificQueryGraphResponse.model_validate( 334 | raw_response.json() 335 | ) 336 | 337 | return response 338 | -------------------------------------------------------------------------------- /src/whyhow/client.py: -------------------------------------------------------------------------------- 1 | """Implementation of the client logic.""" 2 | 3 | import os 4 | from typing import Any, Generator, Optional 5 | 6 | from httpx import AsyncClient, Auth, Client, Request, Response 7 | 8 | from whyhow.apis.graph import GraphAPI 9 | 10 | 11 | class APIKeyAuth(Auth): 12 | """Authorization header with API key.""" 13 | 14 | def __init__( 15 | self, 16 | api_key: str, 17 | pinecone_api_key: str, 18 | neo4j_url: str, 19 | neo4j_user: str, 20 | neo4j_password: str, 21 | model_type: str, 22 | openai_api_key: Optional[str] = None, 23 | azure_openai_api_key: Optional[str] = None, 24 | azure_openai_version: Optional[str] = None, 25 | azure_openai_endpoint: Optional[str] = None, 26 | azure_openai_model_name: Optional[str] = None, 27 | azure_openai_embedding_name: Optional[str] = None, 28 | use_azure: Optional[bool] = False, 29 | ) -> None: 30 | """Initialize the auth object.""" 31 | if openai_api_key and azure_openai_api_key: 32 | raise ValueError( 33 | "Only one of openai_api_key or " 34 | "azure_openai_api_key should be set." 35 | ) 36 | 37 | self.api_key = api_key 38 | self.pinecone_api_key = pinecone_api_key 39 | self.neo4j_url = neo4j_url 40 | self.neo4j_user = neo4j_user 41 | self.neo4j_password = neo4j_password 42 | self.model_type = model_type 43 | self.openai_api_key = openai_api_key 44 | self.azure_openai_api_key = azure_openai_api_key 45 | self.azure_openai_version = azure_openai_version 46 | self.azure_openai_endpoint = azure_openai_endpoint 47 | self.azure_openai_model_name = azure_openai_model_name 48 | self.azure_openai_embedding_name = azure_openai_embedding_name 49 | self.use_azure = use_azure 50 | 51 | def auth_flow( 52 | self, request: Request 53 | ) -> Generator[Request, Response, None]: 54 | """Add the API key to the request.""" 55 | request.headers["x-api-key"] = self.api_key 56 | request.headers["x-pinecone-key"] = self.pinecone_api_key 57 | if self.openai_api_key is not None: 58 | request.headers["x-openai-key"] = self.openai_api_key 59 | elif self.azure_openai_api_key is not None: 60 | request.headers["x-azure-openai-key"] = self.azure_openai_api_key 61 | if self.azure_openai_version is not None: 62 | request.headers["x-azure-openai-version"] = ( 63 | self.azure_openai_version 64 | ) 65 | if self.azure_openai_endpoint is not None: 66 | request.headers["x-azure-openai-endpoint"] = ( 67 | self.azure_openai_endpoint 68 | ) 69 | if self.azure_openai_model_name is not None: 70 | request.headers["x-azure-openai-model-name"] = ( 71 | self.azure_openai_model_name 72 | ) 73 | if self.azure_openai_embedding_name is not None: 74 | request.headers["x-azure-openai-embedding-name"] = ( 75 | self.azure_openai_embedding_name 76 | ) 77 | 78 | request.headers["x-neo4j-user"] = self.neo4j_user 79 | request.headers["x-neo4j-password"] = self.neo4j_password 80 | request.headers["x-neo4j-url"] = self.neo4j_url 81 | request.headers["x-model-type"] = self.model_type 82 | 83 | if self.use_azure: 84 | request.headers["x-use-azure"] = "True" 85 | elif not self.use_azure: 86 | request.headers["x-use-azure"] = "False" 87 | 88 | yield request 89 | 90 | 91 | class WhyHow: 92 | """Synchronous client for the WhyHow API. 93 | 94 | Parameters 95 | ---------- 96 | api_key : str, optional 97 | The API key to use for authentication. If not provided, the 98 | WHYHOW_API_KEY environment variable will be used. 99 | 100 | base_url : str, optional 101 | The base URL for the API. 102 | 103 | httpx_kwargs : dict, optional 104 | Additional keyword arguments to pass to the httpx client. 105 | 106 | Attributes 107 | ---------- 108 | httpx_client : httpx.Client 109 | A synchronous httpx client. 110 | """ 111 | 112 | def __init__( 113 | self, 114 | api_key: str | None = None, 115 | pinecone_api_key: str | None = None, 116 | neo4j_url: str | None = None, 117 | neo4j_user: str | None = None, 118 | neo4j_password: str | None = None, 119 | model_type: Optional[str] | None = None, 120 | openai_api_key: str | None = None, 121 | azure_openai_api_key: str | None = None, 122 | azure_openai_version: str | None = None, 123 | azure_openai_endpoint: str | None = None, 124 | azure_openai_model_name: str | None = None, 125 | azure_openai_embedding_name: str | None = None, 126 | base_url: 127 | str = "https://43nq5c1b4c.execute-api.us-east-2.amazonaws.com", 128 | use_azure: Optional[bool] = False, 129 | httpx_kwargs: dict[str, Any] | None = None, 130 | ) -> None: 131 | """Initialize the client.""" 132 | if httpx_kwargs is None: 133 | httpx_kwargs = {} 134 | 135 | if api_key is None: 136 | api_key = os.environ.get("WHYHOW_API_KEY") 137 | 138 | if api_key is None: 139 | raise ValueError("WHYHOW_API_KEY must be set.") 140 | 141 | if pinecone_api_key is None: 142 | pinecone_api_key = os.environ.get("PINECONE_API_KEY") 143 | 144 | if pinecone_api_key is None: 145 | raise ValueError("PINECONE_API_KEY must be set.") 146 | 147 | if model_type is None: 148 | model_type = os.environ.get("MODEL_TYPE") 149 | 150 | if model_type is None: 151 | model_type = "general" 152 | 153 | elif model_type not in ["general", "health"]: 154 | print("Invalid model type. Using general model.") 155 | model_type = "general" 156 | 157 | if openai_api_key is None: 158 | openai_api_key = os.environ.get("OPENAI_API_KEY") 159 | 160 | if azure_openai_api_key is None: 161 | azure_openai_api_key = os.environ.get("AZURE_OPENAI_API_KEY") 162 | 163 | if openai_api_key is None and azure_openai_api_key is None: 164 | raise ValueError( 165 | "At least one of OPENAI_API_KEY" 166 | "or AZURE_OPENAI_API_KEY must be set." 167 | ) 168 | 169 | if azure_openai_version is None: 170 | azure_openai_version = os.environ.get("AZURE_OPENAI_API_VERSION") 171 | 172 | if ( 173 | azure_openai_api_key is not None 174 | and azure_openai_version is None 175 | ): 176 | raise ValueError("AZURE_OPENAI_API_VERSION must be set.") 177 | 178 | if azure_openai_endpoint is None: 179 | azure_openai_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT") 180 | 181 | if ( 182 | azure_openai_api_key is not None 183 | and azure_openai_endpoint is None 184 | ): 185 | raise ValueError("AZURE_OPENAI_ENDPOINT must be set.") 186 | 187 | if azure_openai_model_name is None: 188 | azure_openai_model_name = os.environ.get("AZURE_OPENAI_MODEL_NAME") 189 | 190 | if ( 191 | azure_openai_api_key is not None 192 | and azure_openai_model_name is None 193 | ): 194 | raise ValueError("AZURE_OPENAI_MODEL_NAME must be set.") 195 | 196 | if azure_openai_embedding_name is None: 197 | azure_openai_embedding_name = os.environ.get( 198 | "AZURE_OPENAI_EMBEDDING_NAME" 199 | ) 200 | 201 | if ( 202 | azure_openai_api_key is not None 203 | and azure_openai_embedding_name is None 204 | ): 205 | raise ValueError("AZURE_OPENAI_EMBEDDING_NAME must be set.") 206 | 207 | if neo4j_user is None: 208 | neo4j_user = os.environ.get("NEO4J_USER") 209 | 210 | if neo4j_user is None: 211 | raise ValueError("NEO4J_USER must be set.") 212 | 213 | if neo4j_password is None: 214 | neo4j_password = os.environ.get("NEO4J_PASSWORD") 215 | 216 | if neo4j_password is None: 217 | raise ValueError("NEO4J_PASSWORD must be set.") 218 | 219 | if neo4j_url is None: 220 | neo4j_url = os.environ.get("NEO4J_URL") 221 | 222 | if neo4j_url is None: 223 | raise ValueError("NEO4J_URL must be set.") 224 | 225 | auth = APIKeyAuth( 226 | api_key, 227 | pinecone_api_key, 228 | neo4j_url, 229 | neo4j_user, 230 | neo4j_password, 231 | model_type, 232 | openai_api_key, 233 | azure_openai_api_key, 234 | azure_openai_version, 235 | azure_openai_endpoint, 236 | azure_openai_model_name, 237 | azure_openai_embedding_name, 238 | use_azure, 239 | ) 240 | 241 | if "base_url" in httpx_kwargs: 242 | raise ValueError("base_url cannot be set in httpx_kwargs.") 243 | 244 | httpx_kwargs["timeout"] = 60.0 # Set timeout to 30 seconds 245 | 246 | self.httpx_client = Client( 247 | base_url=base_url, 248 | auth=auth, 249 | **httpx_kwargs, 250 | ) 251 | 252 | self.graph = GraphAPI(client=self.httpx_client, prefix="/graphs") 253 | 254 | 255 | class AsyncWhyHow: 256 | """Asynchronous client for the WhyHow API. 257 | 258 | Parameters 259 | ---------- 260 | api_key : str, optional 261 | The API key to use for authentication. If not provided, the 262 | WHYHOW_API_KEY environment variable will be used. 263 | 264 | base_url : str, optional 265 | The base URL for the API. 266 | 267 | httpx_kwargs : dict, optional 268 | Additional keyword arguments to pass to the httpx async client. 269 | 270 | Attributes 271 | ---------- 272 | httpx_client : httpx.AsyncClient 273 | An async httpx client. 274 | """ 275 | 276 | def __init__( 277 | self, 278 | api_key: str | None = None, 279 | pinecone_api_key: str | None = None, 280 | openai_api_key: str | None = None, 281 | neo4j_user: str | None = None, 282 | neo4j_password: str | None = None, 283 | neo4j_url: str | None = None, 284 | model_type: Optional[str] | None = None, 285 | base_url: 286 | str = "https://43nq5c1b4c.execute-api.us-east-2.amazonaws.com", 287 | httpx_kwargs: dict[str, Any] | None = None, 288 | ) -> None: 289 | """Initialize the client.""" 290 | if httpx_kwargs is None: 291 | httpx_kwargs = {} 292 | 293 | if api_key is None: 294 | api_key = os.environ.get("WHYHOW_API_KEY") 295 | 296 | if api_key is None: 297 | raise ValueError("WHYHOW_API_KEY must be set.") 298 | 299 | if pinecone_api_key is None: 300 | pinecone_api_key = os.environ.get("PINECONE_API_KEY") 301 | 302 | if pinecone_api_key is None: 303 | raise ValueError("PINECONE_API_KEY must be set.") 304 | 305 | if model_type is None: 306 | model_type = os.environ.get("MODEL_TYPE") 307 | 308 | if model_type is None: 309 | model_type = "general" 310 | 311 | elif model_type not in ["general", "health"]: 312 | print("Invalid model type. Using general model.") 313 | model_type = "general" 314 | 315 | if openai_api_key is None: 316 | openai_api_key = os.environ.get("OPENAI_API_KEY") 317 | 318 | if openai_api_key is None: 319 | raise ValueError("OPENAI_API_KEY must be set.") 320 | 321 | if neo4j_user is None: 322 | neo4j_user = os.environ.get("NEO4J_USER") 323 | 324 | if neo4j_user is None: 325 | raise ValueError("NEO4J_USER must be set.") 326 | 327 | if neo4j_password is None: 328 | neo4j_password = os.environ.get("NEO4J_PASSWORD") 329 | 330 | if neo4j_password is None: 331 | raise ValueError("NEO4J_PASSWORD must be set.") 332 | 333 | if neo4j_url is None: 334 | neo4j_url = os.environ.get("NEO4J_URL") 335 | 336 | if neo4j_url is None: 337 | raise ValueError("NEO4J_URL must be set.") 338 | 339 | auth = APIKeyAuth( 340 | api_key, 341 | pinecone_api_key, 342 | openai_api_key, 343 | neo4j_user, 344 | neo4j_password, 345 | neo4j_url, 346 | model_type, 347 | ) 348 | 349 | if "base_url" in httpx_kwargs: 350 | raise ValueError("base_url cannot be set in httpx_kwargs.") 351 | 352 | self.httpx_client = AsyncClient( 353 | base_url=base_url, 354 | auth=auth, 355 | **httpx_kwargs, 356 | ) 357 | -------------------------------------------------------------------------------- /src/whyhow/exceptions.py: -------------------------------------------------------------------------------- 1 | """Collection of all custom exceptions for the package.""" 2 | 3 | 4 | class ResourceNotAvailableError(Exception): 5 | """Raised when a resource is not available.""" 6 | 7 | pass 8 | -------------------------------------------------------------------------------- /src/whyhow/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whyhow-ai/whyhow/63a3c6c18e660ac5c7268159956f62db3f15a0ae/src/whyhow/py.typed -------------------------------------------------------------------------------- /src/whyhow/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | """Collection of various schemas and types.""" 2 | 3 | from whyhow.schemas.common import Entity, Graph, Node, Relationship, Triple 4 | 5 | __all__ = ["Entity", "Graph", "Node", "Relationship", "Triple"] 6 | -------------------------------------------------------------------------------- /src/whyhow/schemas/base.py: -------------------------------------------------------------------------------- 1 | """Base classes for request, response, and return schemas.""" 2 | 3 | from abc import ABC 4 | 5 | from pydantic import BaseModel, ConfigDict 6 | 7 | 8 | class BaseRequest(BaseModel, ABC): 9 | """Base class for all request schemas.""" 10 | 11 | model_config = ConfigDict(extra="forbid") 12 | 13 | 14 | class BaseResponse(BaseModel, ABC): 15 | """Base class for all response schemas. 16 | 17 | Since the API can change, we want to ignore any extra fields that are not 18 | defined in the schema. 19 | """ 20 | 21 | model_config = ConfigDict(extra="ignore") 22 | 23 | 24 | class BaseReturn(BaseModel, ABC): 25 | """Base class for return schemas.""" 26 | 27 | model_config = ConfigDict(extra="forbid") 28 | -------------------------------------------------------------------------------- /src/whyhow/schemas/common.py: -------------------------------------------------------------------------------- 1 | """Shared schemas.""" 2 | 3 | from typing import Any, List, Optional 4 | 5 | from pydantic import BaseModel, Field, model_validator 6 | 7 | 8 | class Node(BaseModel): 9 | """Schema for a single node. 10 | 11 | Mirroring Neo4j"s node structure. 12 | """ 13 | 14 | labels: list[str] 15 | properties: dict[str, Any] = Field(default_factory=dict) 16 | 17 | 18 | class Relationship(BaseModel): 19 | """Schema for a single relationship. 20 | 21 | Mirroring Neo4j"s relationship structure. 22 | """ 23 | 24 | type: str 25 | start_node: Node 26 | end_node: Node 27 | properties: dict[str, Any] = Field(default_factory=dict) 28 | 29 | 30 | class Graph(BaseModel): 31 | """Schema for a graph. 32 | 33 | Mirroring Neo4j"s graph structure. 34 | """ 35 | 36 | relationships: list[Relationship] 37 | nodes: list[Node] 38 | 39 | @model_validator(mode="before") 40 | @classmethod 41 | def imply_nodes(cls, data: dict[str, Any]) -> dict[str, Any]: 42 | """Implies nodes from relationships if not provided.""" 43 | if "nodes" not in data or data["nodes"] is None: 44 | nodes = [] 45 | for rel in data.get("relationships", []): 46 | start = rel.start_node 47 | end = rel.end_node 48 | if start not in nodes: 49 | nodes.append(start) 50 | if end not in nodes: 51 | nodes.append(end) 52 | 53 | data["nodes"] = nodes 54 | 55 | return data 56 | 57 | 58 | class Entity(BaseModel): 59 | """Schema for a single entity. 60 | 61 | Note that this is not identical to Node because 62 | it only allows for 1 label and the text is a required field. 63 | """ 64 | 65 | text: str 66 | label: str 67 | properties: dict[str, Any] = Field(default_factory=dict) 68 | 69 | def to_node(self) -> Node: 70 | """Convert the entity to a node.""" 71 | properties = self.properties.copy() 72 | properties["name"] = self.text 73 | 74 | return Node(labels=[self.label], properties=properties) 75 | 76 | @classmethod 77 | def from_node(cls, node: Node) -> "Entity": 78 | """Convert a node to an entity.""" 79 | if "name" not in node.properties: 80 | raise ValueError("Node must have a name property.") 81 | 82 | properties = node.properties.copy() 83 | name = properties.pop("name") 84 | 85 | return cls( 86 | text=name, 87 | label=node.labels[0], # take the first label 88 | properties=properties, 89 | ) 90 | 91 | 92 | class Triple(BaseModel): 93 | """Schema for a single triple. 94 | 95 | Note that this is not identical to RelationshipNeo4j because 96 | it is not using ids 97 | 98 | """ 99 | 100 | head: str 101 | head_type: str 102 | relationship: str 103 | tail: str 104 | tail_type: str 105 | properties: dict[str, Any] = Field(default_factory=dict) 106 | 107 | def to_relationship(self) -> Relationship: 108 | """Convert the triple to a relationship.""" 109 | start = Node(labels=[self.head_type], properties={"name": self.head}) 110 | end = Node(labels=[self.tail_type], properties={"name": self.tail}) 111 | 112 | return Relationship( 113 | type=self.relationship, 114 | start_node=start, 115 | end_node=end, 116 | properties=self.properties, 117 | ) 118 | 119 | @classmethod 120 | def from_relationship(cls, relationship: Relationship) -> "Triple": 121 | """Convert a relationship to a triple.""" 122 | start = relationship.start_node 123 | end = relationship.end_node 124 | 125 | if "name" not in start.properties: 126 | raise ValueError("Start node must have a name property.") 127 | if "name" not in end.properties: 128 | raise ValueError("End node must have a name property.") 129 | 130 | return cls( 131 | head=start.properties["name"], 132 | head_type=start.labels[0], # take the first label 133 | relationship=relationship.type, 134 | tail=end.properties["name"], 135 | tail_type=end.labels[0], # take the first label 136 | properties=relationship.properties, 137 | ) 138 | 139 | 140 | # GRAPH SCHEMA 141 | class SchemaEntity(BaseModel): 142 | """Schema Entity model.""" 143 | 144 | name: str 145 | property_columns: Optional[List[str]] = None 146 | set_type_as: Optional[str] = None 147 | description: str 148 | 149 | 150 | class SchemaRelation(BaseModel): 151 | """Schema Relation model.""" 152 | 153 | name: str 154 | description: str 155 | 156 | 157 | class TriplePattern(BaseModel): 158 | """Schema Triple Pattern model.""" 159 | 160 | head: str 161 | relation: str 162 | tail: str 163 | description: str 164 | 165 | 166 | class Schema(BaseModel): 167 | """Schema model.""" 168 | 169 | entities: List[SchemaEntity] = Field(default_factory=list) 170 | relations: List[SchemaRelation] = Field(default_factory=list) 171 | patterns: List[TriplePattern] = Field(default_factory=list) 172 | 173 | def get_entity(self, name: str) -> Optional[SchemaEntity]: 174 | """Return an entity by name if it exists in the schema.""" 175 | for entity in self.entities: 176 | if entity.name == name: 177 | return entity 178 | return None # Return None if no entity with that name is found 179 | 180 | def get_relation(self, name: str) -> Optional[SchemaRelation]: 181 | """Return a relation by name if it exists in the schema.""" 182 | for relation in self.relations: 183 | if relation.name == name: 184 | return relation 185 | return None # Return None if no relation with that name is found 186 | -------------------------------------------------------------------------------- /src/whyhow/schemas/graph.py: -------------------------------------------------------------------------------- 1 | """Collection of schemas for the API.""" 2 | 3 | from typing import Literal 4 | 5 | from whyhow.schemas.base import BaseRequest, BaseResponse, BaseReturn 6 | from whyhow.schemas.common import Graph, Schema 7 | 8 | # Custom types 9 | Status = Literal["success", "pending", "failure"] 10 | 11 | 12 | class AddDocumentsResponse(BaseResponse): 13 | """Schema for the response body of the add documents endpoint.""" 14 | 15 | namespace: str 16 | message: str 17 | 18 | 19 | class CreateQuestionGraphRequest(BaseRequest): 20 | """Schema for the request body of the create graph endpoint.""" 21 | 22 | questions: list[str] 23 | 24 | 25 | class CreateSchemaGraphRequest(BaseRequest): 26 | """Schema for the request body of the create graph with schema endpoint.""" 27 | 28 | graph_schema: Schema 29 | 30 | 31 | # Request and response schemas 32 | class CreateGraphResponse(BaseResponse): 33 | """Schema for the response body of the create graph endpoint.""" 34 | 35 | namespace: str 36 | message: str 37 | 38 | 39 | class GetGraphResponse(BaseResponse): 40 | """Schema for the response body of the get graph endpoint.""" 41 | 42 | namespace: str 43 | status: Status 44 | documents: list[str] 45 | graph: Graph 46 | 47 | 48 | class QueryGraphRequest(BaseRequest): 49 | """Schema for the request body of the query graph endpoint.""" 50 | 51 | query: str 52 | include_triples: bool = False 53 | include_chunks: bool = False 54 | 55 | 56 | class QueryGraphTripleResponse(BaseResponse): 57 | """Schema for the triples within the query graph response.""" 58 | 59 | head: str 60 | relation: str 61 | tail: str 62 | 63 | 64 | class QueryGraphChunkResponse(BaseResponse): 65 | """Schema for the triples within the query graph response.""" 66 | 67 | head: str 68 | relation: str 69 | tail: str 70 | chunk_ids: list[str] 71 | chunk_texts: list[str] 72 | 73 | 74 | class QueryGraphResponse(BaseResponse): 75 | """Schema for the response body of the query graph endpoint.""" 76 | 77 | namespace: str 78 | answer: str 79 | triples: list[QueryGraphTripleResponse] = [] 80 | chunks: list[QueryGraphChunkResponse] = [] 81 | 82 | 83 | class QueryGraphReturn(BaseReturn): 84 | """Schema for the return value of the query graph endpoint.""" 85 | 86 | namespace: str 87 | answer: str 88 | triples: list[QueryGraphTripleResponse] = [] 89 | chunks: list[QueryGraphChunkResponse] = [] 90 | 91 | 92 | class SpecificQueryGraphRequest(BaseRequest): 93 | """Schema for the request body of the specific query graph endpoint.""" 94 | 95 | query: str 96 | entities: list[str] = [] 97 | relations: list[str] = [] 98 | include_triples: bool = False 99 | include_chunks: bool = False 100 | 101 | 102 | class SpecificQueryGraphResponse(BaseResponse): 103 | """Schema for the response body of the query graph endpoint.""" 104 | 105 | namespace: str 106 | answer: str 107 | triples: list[dict[str, str]] = [] 108 | -------------------------------------------------------------------------------- /tests/apis/test_graph.py: -------------------------------------------------------------------------------- 1 | """Tests focused on the graph API.""" 2 | 3 | import os 4 | 5 | import pytest 6 | 7 | from whyhow.client import WhyHow 8 | from whyhow.schemas.common import Graph, Node, Relationship 9 | from whyhow.schemas.graph import QueryGraphRequest, QueryGraphResponse 10 | 11 | # Set fake environment variables 12 | os.environ["WHYHOW_API_KEY"] = "fake_api_key" 13 | os.environ["OPENAI_API_KEY"] = "fake_openai_key" 14 | os.environ["PINECONE_API_KEY"] = "fake_pinecone_key" 15 | os.environ["NEO4J_USER"] = "fake_neo4j_user" 16 | os.environ["NEO4J_PASSWORD"] = "fake_neo4j_password" 17 | os.environ["NEO4J_URL"] = "fake_neo4j_url" 18 | 19 | EXAMPLE_GRAPH = Graph( 20 | relationships=[ 21 | Relationship( 22 | type="knows", 23 | start_node=Node(labels=["Person"], properties={"name": "Alice"}), 24 | end_node=Node(labels=["Person"], properties={"name": "Bob"}), 25 | properties={"since": "2022-01-01"}, 26 | ) 27 | ] 28 | ) 29 | 30 | 31 | class TestGraphAPIQuery: 32 | """Tests for the `query` method.""" 33 | 34 | def test_query_graph(self, httpx_mock): 35 | """Test that the method sends the correct request.""" 36 | client = WhyHow( 37 | openai_api_key="fake_openai_key", 38 | azure_openai_api_key=None, 39 | azure_openai_version=None, 40 | ) 41 | query = "What friends does Alice have?" 42 | 43 | fake_response_body = QueryGraphResponse( 44 | namespace="something", 45 | answer="Alice knows Bob", 46 | triples=[], 47 | chunks=[], 48 | ) 49 | httpx_mock.add_response( 50 | method="POST", 51 | json=fake_response_body.model_dump(), 52 | ) 53 | 54 | result = client.graph.query_graph( 55 | namespace="something", 56 | query=query, 57 | ) 58 | 59 | assert result == QueryGraphResponse( 60 | namespace="something", 61 | answer="Alice knows Bob", 62 | triples=[], 63 | chunks=[], 64 | ) 65 | 66 | actual_request = httpx_mock.get_requests()[0] 67 | expected_request_body = QueryGraphRequest(query=query) 68 | actual_request_body = QueryGraphRequest.model_validate_json( 69 | actual_request.read().decode() 70 | ) 71 | 72 | assert actual_request.url.path == "/graphs/something/query" 73 | assert actual_request_body == expected_request_body 74 | 75 | 76 | class TestGraphAPIAddDocuments: 77 | """Tests for the `add_documents` method.""" 78 | 79 | def test_errors(self, httpx_mock, tmp_path): 80 | """Test that the method raises errors when necessary.""" 81 | client = WhyHow( 82 | openai_api_key="fake_openai_key", 83 | azure_openai_api_key=None, 84 | azure_openai_version=None, 85 | ) 86 | 87 | with pytest.raises(ValueError, match="No documents provided"): 88 | client.graph.add_documents("something", documents=[]) 89 | 90 | tmp_pdf_1 = tmp_path / "example.pdf" 91 | tmp_pdf_1.touch() 92 | tmp_pdf_2 = tmp_path / "example2.wav" 93 | 94 | with pytest.raises(ValueError, match="Not all documents exist"): 95 | client.graph.add_documents( 96 | "something", 97 | documents=[tmp_pdf_1, tmp_pdf_2], 98 | ) 99 | 100 | tmp_pdf_2.touch() 101 | 102 | with pytest.raises( 103 | ValueError, match="Only PDFs and CSVs are supported" 104 | ): 105 | client.graph.add_documents( 106 | "something", 107 | documents=[tmp_pdf_1, tmp_pdf_2], 108 | ) 109 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Configuration for the tests.""" 2 | 3 | import pathlib 4 | 5 | import pytest 6 | 7 | 8 | @pytest.fixture 9 | def test_path(): 10 | """Return the path to the tests directory.""" 11 | return pathlib.Path(__file__).parent 12 | 13 | 14 | @pytest.fixture(autouse=True) 15 | def delete_env_vars(monkeypatch): 16 | """Delete environment variables. 17 | 18 | This fixture is used to delete the environment variables that are used 19 | 20 | """ 21 | monkeypatch.setenv("WHYHOW_API_KEY", "FAKE") 22 | -------------------------------------------------------------------------------- /tests/schemas/test_common.py: -------------------------------------------------------------------------------- 1 | """Tests for whyhow.schemas.common.""" 2 | 3 | import pytest 4 | 5 | from whyhow.schemas.common import Entity, Graph, Node, Relationship, Triple 6 | 7 | 8 | class TestGraph: 9 | """Tests for the Graph class.""" 10 | 11 | def test_no_nodes(self): 12 | """Test creating a graph with no nodes.""" 13 | graph = Graph(relationships=[]) 14 | assert graph.nodes == [] 15 | assert graph.relationships == [] 16 | 17 | def test_1_node(self): 18 | """Test creating a graph with one node.""" 19 | node = Node(labels=["Person"], properties={"name": "Alice"}) 20 | graph = Graph(nodes=[node], relationships=[]) 21 | assert graph.nodes == [node] 22 | assert graph.relationships == [] 23 | 24 | def test_3_nodes_1_rel(self): 25 | """Test creating a graph with three nodes and one relationship.""" 26 | node_1 = Node(labels=["Person"], properties={"name": "Alice"}) 27 | node_2 = Node(labels=["Person"], properties={"name": "Bob"}) 28 | node_3 = Node(labels=["Person"], properties={"name": "Charlie"}) 29 | 30 | rel = Relationship( 31 | start_node=node_1, 32 | end_node=node_2, 33 | type="KNOWS", 34 | properties={"since": 1999}, 35 | ) 36 | 37 | graph = Graph(nodes=[node_1, node_2, node_3], relationships=[rel]) 38 | assert graph.nodes == [node_1, node_2, node_3] 39 | assert graph.relationships == [rel] 40 | 41 | graph_implied = Graph(relationships=[rel]) 42 | 43 | assert graph_implied.nodes == [node_1, node_2] 44 | assert graph_implied.relationships == [rel] 45 | 46 | 47 | class TestEntity: 48 | """Tests for the Entity class.""" 49 | 50 | def test_overall(self): 51 | """Test creating an entity and converting it to a node.""" 52 | entity = Entity( 53 | text="Alice", label="Person", properties={"foo": "bar"} 54 | ) 55 | node = entity.to_node() 56 | 57 | assert node.labels == ["Person"] 58 | assert node.properties == {"name": "Alice", "foo": "bar"} 59 | 60 | entity_reconstructed = Entity.from_node(node) 61 | 62 | assert entity.text == entity_reconstructed.text 63 | assert entity.label == entity_reconstructed.label 64 | assert entity.properties == entity_reconstructed.properties 65 | 66 | # test properties copied 67 | assert entity.properties is not entity_reconstructed.properties 68 | 69 | def test_missing_name(self): 70 | """Test creating an entity without a name property.""" 71 | node = Node(labels=["Person"], properties={}) 72 | 73 | with pytest.raises(ValueError, match="Node must have a name property"): 74 | Entity.from_node(node) 75 | 76 | 77 | class TestTriple: 78 | """Tests for the Triple class.""" 79 | 80 | # def test_overall(self): 81 | # triple = Triple( 82 | # head="Alice", 83 | # head_type="Person", 84 | # relationship="KNOWS", 85 | # tail="Bob", 86 | # tail_type="Person", 87 | # properties={"since": 1999}, 88 | # ) 89 | 90 | # assert triple.head == "Alice" 91 | # assert triple.head_type == "Person" 92 | # assert triple.relationship == "KNOWS" 93 | # assert triple.tail == "Bob" 94 | # assert triple.tail_type == "Person" 95 | 96 | # rel = triple.to_relationship() 97 | 98 | # assert rel.start_node.labels == ["Person"] 99 | # assert rel.start_node.properties == {"name": "Alice"} 100 | # assert rel.end_node.labels == ["Person"] 101 | # assert rel.end_node.properties == {"name": "Bob"} 102 | # assert rel.type == "KNOWS" 103 | # assert rel.properties == {"since": 1999} 104 | 105 | # triple_reconstructed = Triple.from_relationship(rel) 106 | 107 | # assert triple.head == triple_reconstructed.head 108 | # assert triple.head_type == triple_reconstructed.head_type 109 | # assert triple.relationship == triple_reconstructed.relationship 110 | # assert triple.tail == triple_reconstructed.tail 111 | # assert triple.tail_type == triple_reconstructed.tail_type 112 | # assert triple.properties == triple_reconstructed.properties 113 | 114 | # # test properties copied 115 | # assert triple.properties is not triple_reconstructed.properties 116 | 117 | def test_missing_name(self): 118 | """Test creating a triple with a node missing a name property.""" 119 | rel = Relationship( 120 | start_node=Node(labels=["Person"], properties={}), 121 | end_node=Node(labels=["Person"], properties={"name": "Bob"}), 122 | type="KNOWS", 123 | properties={}, 124 | ) 125 | 126 | with pytest.raises( 127 | ValueError, match="Start node must have a name property" 128 | ): 129 | Triple.from_relationship(rel) 130 | 131 | rel = Relationship( 132 | start_node=Node(labels=["Person"], properties={"name": "Alice"}), 133 | end_node=Node(labels=["Person"], properties={}), 134 | type="KNOWS", 135 | properties={}, 136 | ) 137 | 138 | with pytest.raises( 139 | ValueError, match="End node must have a name property" 140 | ): 141 | Triple.from_relationship(rel) 142 | -------------------------------------------------------------------------------- /tests/test_client.py: -------------------------------------------------------------------------------- 1 | """Tests for the client module.""" 2 | 3 | from unittest.mock import Mock 4 | 5 | import pytest 6 | from httpx import Client 7 | 8 | from whyhow.client import WhyHow 9 | 10 | 11 | class TestWhyHow: 12 | """Tests for the WhyHow class.""" 13 | 14 | def test_constructor_missing_api_key(self, monkeypatch): 15 | """Test that an error raised when the API key is missing.""" 16 | monkeypatch.delenv("WHYHOW_API_KEY", raising=False) 17 | with pytest.raises(ValueError, match="WHYHOW_API_KEY must be set"): 18 | WhyHow() 19 | 20 | def test_httpx_kwargs(self, monkeypatch): 21 | """Test that httpx_kwargs passed to the httpx client.""" 22 | fake_httpx_client_inst = Mock(spec=Client) 23 | fake_httpx_client_class = Mock(return_value=fake_httpx_client_inst) 24 | 25 | monkeypatch.setattr("whyhow.client.Client", fake_httpx_client_class) 26 | httpx_kwargs = {"verify": False} 27 | client = WhyHow( 28 | api_key="key", 29 | httpx_kwargs=httpx_kwargs, 30 | ) 31 | 32 | assert fake_httpx_client_class.call_count == 1 33 | args, kwargs = fake_httpx_client_class.call_args 34 | 35 | assert not args 36 | assert ( 37 | kwargs["base_url"] 38 | == "https://43nq5c1b4c.execute-api.us-east-2.amazonaws.com" 39 | ) 40 | assert not kwargs["verify"] 41 | 42 | assert client.httpx_client is fake_httpx_client_class.return_value 43 | 44 | def test_base_url_twice(self): 45 | """Test that an error raised when base_url is set twice.""" 46 | with pytest.raises( 47 | ValueError, match="base_url cannot be set in httpx_kwargs." 48 | ): 49 | WhyHow( 50 | api_key="key", 51 | httpx_kwargs={"base_url": "https://example.com"}, 52 | ) 53 | -------------------------------------------------------------------------------- /tests/test_dummy.py: -------------------------------------------------------------------------------- 1 | """Dummy test.""" 2 | 3 | 4 | def test(): 5 | """Dummy test.""" 6 | assert True 7 | --------------------------------------------------------------------------------