├── .env.sample
├── .github
    └── workflows
    │   └── main.yml
├── .gitignore
├── CHANGELOG.md
├── README.md
├── docs
    ├── api.md
    ├── index.md
    ├── installation.md
    └── tutorial.md
├── examples
    ├── assets
    │   ├── specialists.csv
    │   └── specialists.json
    ├── create_graph_from_csv.ipynb
    ├── create_graph_from_questions.ipynb
    └── create_graph_from_schema.ipynb
├── mkdocs.yml
├── pyproject.toml
├── src
    └── whyhow
    │   ├── __init__.py
    │   ├── apis
    │       ├── __init__.py
    │       ├── base.py
    │       └── graph.py
    │   ├── client.py
    │   ├── exceptions.py
    │   ├── py.typed
    │   └── schemas
    │       ├── __init__.py
    │       ├── base.py
    │       ├── common.py
    │       └── graph.py
└── tests
    ├── apis
        └── test_graph.py
    ├── conftest.py
    ├── schemas
        └── test_common.py
    ├── test_client.py
    └── test_dummy.py


/.env.sample:
--------------------------------------------------------------------------------
 1 | # .env.example
 2 | # Copy this file as '.env' and fill in the values as described below.
 3 | WHYHOW_API_KEY=""
 4 | 
 5 | # OpenAI API Key: A string containing your OpenAI API key for accessing their services.
 6 | OPENAI_API_KEY=""
 7 | 
 8 | # Pinecone API Key: A string with your Pinecone API key.
 9 | PINECONE_API_KEY=""
10 | 
11 | # Neo4J URI: The URI for connecting to your Neo4J database instance.
12 | NEO4J_URI=""
13 | 
14 | # Neo4J User: The username for Neo4J database authentication.
15 | NEO4J_USER=""
16 | 
17 | # Neo4J Password: The password for Neo4J database authentication.
18 | NEO4J_PASSWORD=""


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | name: all
 2 | 
 3 | on:
 4 |   pull_request:
 5 |   push:
 6 |     branches: [main]
 7 | 
 8 | jobs:
 9 |   build:
10 | 
11 |     runs-on:  ${{ matrix.os }}
12 |     strategy:
13 |       matrix:
14 |         os: [ubuntu-latest]
15 |         python-version: ['3.10']
16 | 
17 |     steps:
18 |     - uses: actions/checkout@v2
19 | 
20 |     - name: Set up Python ${{ matrix.python-version }}
21 |       uses: actions/setup-python@v2
22 |       with:
23 |         python-version: ${{ matrix.python-version }}
24 | 
25 |     - name: Install Python dependencies
26 |       run: |
27 |         python -m pip install --upgrade pip
28 |         pip install -e .[dev]
29 | 
30 |     - name: Lint with flake8
31 |       run: |
32 |         flake8 src tests examples
33 | 
34 |     - name: Check style with black
35 |       run: |
36 |         black src tests examples
37 | 
38 |     - name: Run security check
39 |       run: |
40 |         bandit -qr -c pyproject.toml src examples
41 | 
42 |     - name: Run import check
43 |       run: |
44 |         isort --check src tests examples
45 | 
46 |     - name: Run mypy
47 |       run: |
48 |         mypy src
49 | 
50 |     - name: Test with pytest
51 |       run: |
52 |         pytest --color=yes
53 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | .DS_Store
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # poetry
 99 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
101 | #   commonly ignored for libraries.
102 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 | 
105 | # pdm
106 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | #   in version control.
110 | #   https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 | 
147 | # Pyre type checker
148 | .pyre/
149 | 
150 | # pytype static type analyzer
151 | .pytype/
152 | 
153 | # Cython debug symbols
154 | cython_debug/
155 | 
156 | # PyCharm
157 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
160 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 | 
163 | data/
164 | .python-version


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | All notable changes to this project will be documented in this file.
 4 | 
 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 7 | 
 8 | ## [Unreleased]
 9 | 
10 | ### Added
11 | - Add all JSON only endpoints
12 | - Anticipate all types
13 | - Add schemas
14 | - Minimal package structure + CI
15 | 
16 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # (Deprecated) WhyHow Knowledge Graph Creation SDK
  2 | 
  3 | 
  4 | > [!CAUTION]
  5 | > This repo has been **deprecated** and is no longer supported. Instead please see the [Knowledge Graph Studio](https://github.com/whyhow-ai/knowledge-graph-studio).
  6 | > We are focusing on KG orchestration and management through our KG Studio. The Knowledge Graph Studio has been open-sourced here - https://github.com/whyhow-ai/knowledge-graph-studio.
  7 | 
  8 | 
  9 | [![Python Version](https://img.shields.io/badge/python-3.10%2B-blue)](https://www.python.org/downloads/)
 10 | [![License](https://img.shields.io/badge/license-MIT-green)](https://opensource.org/licenses/MIT)
 11 | [![PyPI Version](https://img.shields.io/pypi/v/whyhow)](https://pypi.org/project/whyhow/)
 12 | [![Code Style: Black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black)
 13 | [![Checked with mypy](https://img.shields.io/badge/mypy-checked-blue)](https://mypy-lang.org/)
 14 | [![Whyhow Discord](https://dcbadge.vercel.app/api/server/9bWqrsxgHr?compact=true&style=flat)](https://discord.gg/9bWqrsxgHr)
 15 | 
 16 | The WhyHow Knowledge Graph Creation SDK enables you to quickly and easily build automated knowledge graphs tailored to your unique worldview. Instantly build, extend, and query well-scoped KGs with your data.
 17 | 
 18 | 
 19 | # Installation
 20 | 
 21 | ## Prerequisites
 22 | 
 23 | - Python 3.10 or higher
 24 | - [OpenAI API key](https://openai.com/)
 25 | - [Pinecone API key](https://www.pinecone.io/)
 26 | - [Neo4j credentials](https://neo4j.com/cloud/platform/aura-graph-database/) (username, password, and URL)
 27 | 
 28 | ## Install from PyPI
 29 | 
 30 | You can install the SDK directly from PyPI using pip:
 31 | 
 32 | ```shell
 33 | pip install whyhow
 34 | 
 35 | # For OpenAI
 36 | export OPENAI_API_KEY=<your openai api key>
 37 | 
 38 | # For Azure OpenAI 
 39 | export AZURE_OPENAI_API_KEY=<your azure openai api key>
 40 | export AZURE_OPENAI_API_VERSION=<your azure openai api version>
 41 | export AZURE_OPENAI_ENDPOINT=<your azure openai api endpoint>
 42 | export AZURE_OPENAI_MODEL_NAME=<your azure model name>
 43 | export AZURE_OPENAI_EMBEDDING_NAME=<your azure embedding name>
 44 | 
 45 | export PINECONE_API_KEY=<your pinecone api key>
 46 | export NEO4J_URL=<your neo4j url>
 47 | export NEO4J_USER=<your neo4j username>
 48 | export NEO4J_PASSWORD=<your neo4j password>
 49 | ```
 50 | 
 51 | ## Install from Github
 52 | 
 53 | Alternatively, you can clone the repo and install the package
 54 | 
 55 | ```shell
 56 | 
 57 | git clone git@github.com:whyhow-ai/whyhow.git
 58 | cd whyhow
 59 | pip install .
 60 | 
 61 | # For OpenAI
 62 | export OPENAI_API_KEY=<your openai api key>
 63 | 
 64 | # For Azure OpenAI 
 65 | export AZURE_OPENAI_API_KEY=<your azure openai api key>
 66 | export AZURE_OPENAI_API_VERSION=<your azure openai api version>
 67 | export AZURE_OPENAI_ENDPOINT=<your azure openai api endpoint>
 68 | 
 69 | export PINECONE_API_KEY=<your pinecone api key>
 70 | export NEO4J_URL=<your neo4j url>
 71 | export NEO4J_USER=<your neo4j username>
 72 | export NEO4J_PASSWORD=<your neo4j password>
 73 | ```
 74 | 
 75 | # Examples
 76 | 
 77 | Navigate to the `examples/`.
 78 | 
 79 | # How to
 80 | 
 81 | ## Initialize SDK
 82 | 
 83 | Import the SDK and initialize the client using your WhyHow API key.
 84 | 
 85 | ```shell
 86 | from whyhow import WhyHow
 87 | 
 88 | client = WhyHow(api_key=<your whyhow api key>)
 89 | ```
 90 | 
 91 | For Azure Open AI: 
 92 | 
 93 | ```shell
 94 | from whyhow import WhyHow
 95 | 
 96 | client = WhyHow(api_key=<your whyhow api key>, use_azure=True)
 97 | ```
 98 | 
 99 | For an alternative model (for example, healthcare for text extraction): 
100 | 
101 | ```shell
102 | from whyhow import WhyHow
103 | 
104 | client = WhyHow(api_key=<your whyhow api key>, model_type='health')
105 | ```
106 | 
107 | ## Add documents to namespace
108 | 
109 | Your namespace is a logical grouping of the raw data you upload, the seed concepts you define, and the graphs you create. Namespaces are meant to be tightly scoped to your use case. You can create as many namespaces as you want.
110 | 
111 | ```shell
112 | 
113 | namespace = "harry-potter"
114 | documents = ["files/harry_potter_and_the_philosophers_stone.pdf","files/harry_potter_and_the_chamber_of_secrets.pdf"]
115 | 
116 | documents_response = client.graph.add_documents(namespace, documents)
117 | print(documents_response)
118 | # Adding your documents
119 | 
120 | ```
121 | 
122 | ## Create a graph
123 | 
124 | You can create a graph in three different ways. First, you can create a graph using a user-defined schema, giving you complete control over the types of entities and relationships that are extracted and used to build the graph. You can also create a graph using a set of seed questions. In this case, WhyHow will automatically extract entities and relationships that are most applicable to the things you want to know, and construct a graph from these concepts. Or, you can fully deterministically create a graph from structured context in the form of a CSV.
125 | 
126 | Create graph with **schema** if...
127 | 
128 | 1. Your graph must adhere to a consistent structure.
129 | 2. You are very familiar with the structure of your raw documents.
130 | 3. You need comprehensive extraction of concepts across the entire document.
131 | 
132 | Create graph with **seed questions** if...
133 | 
134 | 1. You are unsure as to which relationships and patterns you'd like to build into your graph.
135 | 2. You want to build your graph with only the most semantically similar raw data.
136 | 
137 | Create graph with **csv** if...
138 | 
139 | 1. You alrady know the structure of your data.
140 | 2. You already have data stored in a table format.
141 | 
142 | ### Create a graph with schema
143 | 
144 | Tell the WhyHow SDK exactly which entities, relationships, and patterns you'd like to extract and build into your graph by defining them in a JSON-based schema.
145 | 
146 | ```shell
147 | 
148 | #schema.json
149 | 
150 | {
151 |   "entities": [
152 |     {
153 |       "name": "character",
154 |       "description": "A person appearing in the book, e.g., Harry Potter, Ron Weasley, Hermione Granger, Albus Dumbledore."
155 |     },
156 |     {
157 |       "name": "object",
158 |       "description": "Inanimate items that characters use or interact with, e.g., wand, Philosopher's Stone, Invisibility Cloak, broomstick."
159 |     }
160 |     ...
161 |   ],
162 |   "relations": [
163 |     {
164 |       "name": "friends with",
165 |       "description": "Denotes a friendly relationship between characters."
166 |     },
167 |     {
168 |       "name": "interacts with",
169 |       "description": "Describes a scenario in which a character engages with another character, creature, or object."
170 |     },
171 |     ...
172 |   ],
173 |   "patterns": [
174 |     {
175 |       "head": "character",
176 |       "relation": "friends with",
177 |       "tail": "character",
178 |       "description": "One character is friends with another, e.g., Harry Potter is friends with Ron Weasley."
179 |     },
180 |     {
181 |       "head": "character",
182 |       "relation": "interacts with",
183 |       "tail": "object",
184 |       "description": "A character interacting with an object, e.g., Harry Potter interacts with the Invisibility Cloak."
185 |     }
186 |   ]
187 | }
188 | 
189 | ```
190 | 
191 | Using this schema, we extract relevant concepts from your raw data, construct triples, and generate a graph according to the patterns you define.
192 | 
193 | ```shell
194 | # Create graph from schema
195 | 
196 | schema = "files/schema.json"
197 | create_graph_with_schema_response = client.graph.create_graph_from_schema(namespace, schema)
198 | print(create_graph_with_schema_response)
199 | # Creating your graph
200 | 
201 | ```
202 | 
203 | ### Create a graph with seed questions
204 | 
205 | Tell the WhyHow SDK what you care about by providing a list of concepts in the form of natural language questions. Using these questions, we create a small ontology to guide extraction of entities and relationships that are most relevant to your use case, then construct a graph.
206 | 
207 | ```shell
208 | 
209 | questions = ["What does Harry wear?","Who is Harry friends with?"]
210 | 
211 | create_graph_response = client.graph.create_graph(namespace, questions)
212 | print(create_graph_response)
213 | # Creating your graph
214 | 
215 | ```
216 | 
217 | ### Create a graph with a csv
218 | 
219 | Provide a CSV and a schema (or automatically generate one using the `generate_schema` method) to create a graph. WhyHow will automatically extract entities and relationships from your CSV headers and data.
220 | 
221 | ```shell
222 | 
223 | namespace = "specialists"
224 | documents = ["../examples/assets/specialists.csv"]
225 | schema_file = "../examples/assets/specialists.json"
226 | 
227 | # Automatically generate a schema
228 | schema = client.graph.generate_schema(documents=documents)
229 | 
230 | # Create a graph from a CSV and the schema you bring/build
231 | csv_graph = client.graph.create_graph_from_csv(
232 |     namespace=namespace, schema_file=schema_file
233 | )
234 | 
235 | # Query the graph created from csv using specific entities and relations
236 | query = "Who speaks English and live in Houston?"
237 | entities = ["English","Houston"]
238 | relations = ["SPEAKS","LIVE_IN"]
239 | 
240 | specific_query_response = client.graph.query_graph_specific(
241 |     namespace=namespace,
242 |     query=query,
243 |     entities=entities,
244 |     relations=relations,
245 |     include_triples=False,
246 |     include_chunks=False,
247 | )
248 | 
249 | ```
250 | 
251 | ### Support
252 | 
253 | WhyHow.AI is building tools to help developers bring more determinism and control to their RAG pipelines using graph structures. If you're thinking about, in the process of, or have already incorporated knowledge graphs in RAG, we’d love to chat at team@whyhow.ai, or follow our newsletter at [WhyHow.AI](https://www.whyhow.ai/). Join our discussions about rules, determinism and knowledge graphs in RAG on our [Discord](https://discord.com/invite/9bWqrsxgHr).
254 | 
255 | We appreciate your interest.
256 | 


--------------------------------------------------------------------------------
/docs/api.md:
--------------------------------------------------------------------------------
  1 | Here's the generated `api.md` file for your mkdocs based on the provided code files:
  2 | 
  3 | ```markdown
  4 | # API Reference
  5 | 
  6 | This document provides a reference for the WhyHow API, which allows you to interact with the graph functionality.
  7 | 
  8 | ## GraphAPI
  9 | 
 10 | The `GraphAPI` class provides methods to interact with the graph API synchronously.
 11 | 
 12 | ### `add_documents`
 13 | 
 14 | ```python
 15 | def add_documents(self, namespace: str, documents: list[str]) -> str
 16 | ```
 17 | 
 18 | Add documents to the graph.
 19 | 
 20 | #### Parameters
 21 | 
 22 | - `namespace` (str): The namespace of the graph.
 23 | - `documents` (list[str]): The documents to add.
 24 | 
 25 | #### Returns
 26 | 
 27 | - (str): The response message.
 28 | 
 29 | #### Raises
 30 | 
 31 | - `ValueError`: If no documents are provided, not all documents exist, only PDFs are supported, PDFs are too large (limit: 8MB), or too many documents are provided (limit: 3 files during the beta).
 32 | 
 33 | ### `generate_schema`
 34 | 
 35 | ```python
 36 | def generate_schema(self, documents: list[str]) -> str
 37 | Generate a schema from a CSV document.
 38 | ```
 39 | 
 40 | #### Parameters
 41 | 
 42 | - `documents` (list[str]): The CSV documents to generate the schema from.
 43 | 
 44 | #### Returns
 45 | 
 46 | - (str): The generated schema as a JSON string.
 47 | 
 48 | #### Raises
 49 | 
 50 | - `ValueError`: If no documents are provided, not all documents exist, or the documents are not in CSV format.
 51 | 
 52 | ### `create_graph`
 53 | 
 54 | ```python
 55 | def create_graph(self, namespace: str, questions: list[str]) -> str
 56 | ```
 57 | 
 58 | Create a new graph.
 59 | 
 60 | #### Parameters
 61 | 
 62 | - `namespace` (str): The namespace of the graph to create.
 63 | - `questions` (list[str]): The seed concepts to initialize the graph with.
 64 | 
 65 | #### Returns
 66 | 
 67 | - (str): The response message.
 68 | 
 69 | #### Raises
 70 | 
 71 | - `ValueError`: If no questions are provided.
 72 | 
 73 | ### `create_graph_from_schema`
 74 | 
 75 | ```python
 76 | def create_graph_from_schema(self, namespace: str, schema_file: str) -> str
 77 | ```
 78 | 
 79 | Create a new graph based on a user-defined schema.
 80 | 
 81 | #### Parameters
 82 | 
 83 | - `namespace` (str): The namespace of the graph to create.
 84 | - `schema_file` (str): The schema file to use to build the graph.
 85 | 
 86 | #### Returns
 87 | 
 88 | - (str): The response message.
 89 | 
 90 | #### Raises
 91 | 
 92 | - `ValueError`: If no schema is provided.
 93 | 
 94 | ### `create_graph_from_csv`
 95 | 
 96 | ```python
 97 | def create_graph_from_csv(self, namespace: str, schema_file: str) -> str
 98 | ```
 99 | 
100 | Create a new graph using a CSV based on a user-defined schema.
101 | 
102 | #### Parameters
103 | 
104 | - `namespace` (str): The namespace of the graph to create.
105 | - `schema_file` (str): The schema file to use to build the graph.
106 | 
107 | #### Returns
108 | 
109 | - (str): The response message.
110 | 
111 | #### Raises
112 | 
113 |  - `ValueError`: If no schema is provided or if the schema contains invalid property column names.
114 | 
115 | ### `query_graph`
116 | 
117 | ```python
118 | def query_graph(self, namespace: str, query: str, include_triples: bool = False, include_chunks: bool = False) -> QueryGraphReturn
119 | ```
120 | 
121 | Query the graph.
122 | 
123 | #### Parameters
124 | 
125 | - `namespace` (str): The namespace of the graph.
126 | - `query` (str): The query to run.
127 | - `include_triples` (bool): Include the triples used in the return.
128 | - `include_chunks` (bool): Include the chunk ids and chunk text in the return.
129 | 
130 | #### Returns
131 | 
132 | - (`QueryGraphReturn`): The answer, triples (optional), and chunks (optional).
133 | 
134 | ### `query_graph_specific`
135 | 
136 | ```python
137 | def query_graph_specific(
138 |     self,
139 |     namespace: str,
140 |     query: str,
141 |     entities: list[str] = [],
142 |     relations: list[str] = [],
143 |     include_triples: bool = False,
144 |     include_chunks: bool = False,
145 | ) -> SpecificQueryGraphResponse
146 | ```
147 | 
148 | Query the graph with specific entities and relations.
149 | 
150 | ### Parameters
151 | 
152 | - `namespace` (str): The namespace of the graph.
153 | - `query` (str): The query to run.
154 | - `entities` (list[str]): The entities to query.
155 | - `relations` (list[str]): The relations to query.
156 | - `include_triples` (bool): Whether to include triples in the response.
157 | - `include_chunks` (bool): Whether to include chunks in the response.
158 | 
159 | #### Returns
160 | 
161 | - (SpecificQueryGraphResponse): The namespace, answer, triples, and chunks.
162 | 
163 | ## Schemas
164 | 
165 | The WhyHow API uses Pydantic models to define the request and response schemas.
166 | 
167 | ### `AddDocumentsResponse`
168 | 
169 | ```python
170 | class AddDocumentsResponse(BaseResponse):
171 |     """Schema for the response body of the add documents endpoint."""
172 | 
173 |     namespace: str
174 |     message: str
175 | ```
176 | 
177 | ### `CreateQuestionGraphRequest`
178 | 
179 | ```python
180 | class CreateQuestionGraphRequest(BaseRequest):
181 |     """Schema for the request body of the create graph endpoint."""
182 | 
183 |     questions: list[str]
184 | ```
185 | 
186 | ### `CreateSchemaGraphRequest`
187 | 
188 | ```python
189 | class CreateSchemaGraphRequest(BaseRequest):
190 |     """Schema for the request body of the create graph endpoint."""
191 | 
192 |     graph_schema: SchemaModel
193 | ```
194 | 
195 | ### `CreateGraphResponse`
196 | 
197 | ```python
198 | class CreateGraphResponse(BaseResponse):
199 |     """Schema for the response body of the create graph endpoint."""
200 | 
201 |     namespace: str
202 |     message: str
203 | ```
204 | 
205 | ### `QueryGraphRequest`
206 | 
207 | ```python
208 | class QueryGraphRequest(BaseRequest):
209 |     """Schema for the request body of the query graph endpoint."""
210 | 
211 |     query: str
212 | ```
213 | 
214 | ### `QueryGraphResponse`
215 | 
216 | ```python
217 | class QueryGraphResponse(BaseResponse):
218 |     """Schema for the response body of the query graph endpoint."""
219 | 
220 |     namespace: str
221 |     answer: str
222 |     include_triples: bool = False
223 |     include_chunks: bool = False
224 | ```
225 | 
226 | ### `QueryGraphReturn`
227 | 
228 | ```python
229 | class QueryGraphReturn(BaseReturn):
230 |     """Schema for the return value of the query graph endpoint."""
231 | 
232 |     answer: str
233 | ```
234 | 
235 | ## Base Classes
236 | 
237 | The WhyHow API uses the following base classes for the API schemas:
238 | 
239 | ### `APIBase`
240 | 
241 | ```python
242 | class APIBase(BaseModel, ABC):
243 |     """Base class for API schemas."""
244 | 
245 |     model_config = ConfigDict(arbitrary_types_allowed=True)
246 | 
247 |     client: Client
248 |     prefix: str = ""
249 | ```
250 | 
251 | ### `AsyncAPIBase`
252 | 
253 | ```python
254 | class AsyncAPIBase(BaseModel, ABC):
255 |     """Base class for async API schemas."""
256 | 
257 |     model_config = ConfigDict(arbitrary_types_allowed=True)
258 | 
259 |     client: AsyncClient
260 |     prefix: str = ""
261 | ```
262 | ```
263 | 
264 | This `api.md` file provides an overview of the `GraphAPI` class and its methods, along with the request and response schemas used by the API. It also includes information about the base classes used for the API schemas.
265 | 
266 | You can include this file in your mkdocs documentation to provide a reference for the WhyHow API.


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | # Welcome to the WhyHow Automated Knowledge Graph Creation SDK Documentation
 2 | 
 3 | ![Python 3.10](https://img.shields.io/badge/python-3.10-blue.svg)
 4 | ![Python 3.11](https://img.shields.io/badge/python-3.11-blue.svg)
 5 | 
 6 | The WhyHow Knowledge Graph Creation SDK enables you to quickly and easily build automated knowledge graphs tailored to your unique worldview. Instantly build, extend, and query well-scoped KGs using a raw PDF and simple seed concepts in natural language.
 7 | 
 8 | ## Key Features
 9 | 
10 | - Instantaneously knowledge graphs using your documents and seed concepts (currently supports PDF files)
11 | - Simple API for querying your knowledge graphs using natural language
12 | - Bring your own keys for OpenAI, Pinecone, and Neo4j
13 | 
14 | ## Getting Started
15 | 
16 | 1. Install the package by following the [Installation Guide](installation.md)
17 | 2. Set up your OpenAI, Pinecone, and Neo4j credential as environment variables
18 | 3. Initialize the SDK with your WhyHow API key
19 | 4. Create a namespace and add raw documents using `graph.add_documents()`
20 | 5. Create a graph for the namespace using `graph.create_graph()` using a list of seed concepts
21 | 6. Query the graph with natural language using `graph.query_graph()`
22 | 
23 | For a detailed walkthrough and code examples, check out the [Tutorial](tutorial.md).
24 | 


--------------------------------------------------------------------------------
/docs/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | ## Prerequisites
 4 | 
 5 | - Python 3.10 or higher
 6 | - OpenAI API key
 7 | - Pinecone API key
 8 | - Neo4j credentials (username, password, and URL)
 9 | 
10 | ## Install from PyPI
11 | 
12 | You can install the SDK directly from PyPI using pip:
13 | 
14 | ```shell
15 | pip install whyhow
16 | 
17 | export OPENAI_API_KEY=<your openai api key>
18 | export PINECONE_API_KEY=<your pinecone api key>
19 | export NEO4J_URL=<your neo4j url>
20 | export NEO4J_USER=<your neo4j username>
21 | export NEO4J_PASSWORD=<your neo4j password>
22 | ```
23 | 
24 | If using Azure OpenAI:
25 | 
26 | ```shell
27 | pip install whyhow
28 | 
29 | export AZURE_OPENAI_API_KEY=<your azure openai api key>
30 | export AZURE_OPENAI_API_VERSION=<your azure openai api version>
31 | export AZURE_OPENAI_ENDPOINT=<your azure openai api endpoint>
32 | export AZURE_OPENAI_MODEL_NAME=<your azure model name>
33 | export AZURE_OPENAI_EMBEDDING_NAME=<your azure embedding name>
34 | export PINECONE_API_KEY=<your pinecone api key>
35 | export NEO4J_URL=<your neo4j url>
36 | export NEO4J_USER=<your neo4j username>
37 | export NEO4J_PASSWORD=<your neo4j password>
38 | ```
39 | ## Install from Github
40 | 
41 | Alternatively, you can clone the repo and install the package
42 | 
43 | ```shell
44 | 
45 | git clone git@github.com:whyhow-ai/whyhow.git
46 | cd whyhow
47 | pip install .
48 | 
49 | export OPENAI_API_KEY=<your openai api key>
50 | export PINECONE_API_KEY=<your pinecone api key>
51 | export NEO4J_URL=<your neo4j url>
52 | export NEO4J_USER=<your neo4j username>
53 | export NEO4J_PASSWORD=<your neo4j password>
54 | ```
55 | 
56 | ## Troubleshooting
57 | 
58 | If you encounter any issues during installation, please check the following:
59 | 
60 | - Ensure that you have Python 3.10 or higher installed. You can check your Python version by running `python --version` in your terminal.
61 | - Make sure that you have correctly set the `OPENAI_API_KEY`, `PINECONE_API_KEY`, `NEO4J_URL`, `NEO4J_USER`, `NEO4J_PASSWORD` environment variables with your respective credentials.
62 | - If you are installing from GitHub, ensure that you have cloned the repository correctly and are in the right directory.
63 | - If you are using a virtual environment, make sure that it is activated before running the installation commands.
64 | - If you still face problems, please open an issue on the GitHub repository with detailed information about the error and your environment setup.
65 | 


--------------------------------------------------------------------------------
/docs/tutorial.md:
--------------------------------------------------------------------------------
  1 | # Tutorial
  2 | 
  3 | This is a straightforward tutorial on how to build and query a knowledge graph using PDF texts of Harry Potter books using WhyHow SDK. This example will guide you through importing documents from the Harry Potter series into the knowledge graph, then querying it for insights related to the series.
  4 | 
  5 | ## Environment Setup
  6 | 
  7 | Ensure you have the following:
  8 | 
  9 | - Python 3.10 or higher
 10 | - OpenAI API key
 11 | - Pinecone API key
 12 | - Neo4j credentials (username, password, and URL)
 13 | 
 14 | To keep your API keys and credentials secure, set them as environment variables. Open your terminal and run the following commands, substituting the placeholders with your actual data:
 15 | 
 16 | ```shell
 17 | export WHYHOW_API_KEY=<YOUR_WHYHOW_API_KEY>
 18 | export PINECONE_API_KEY=<YOUR_PINECONE_API_KEY>
 19 | export OPENAI_API_KEY=<YOUR_OPENAI_API_KEY>
 20 | export NEO4J_USER=<YOUR_NEO4J_USER>
 21 | export NEO4J_PASSWORD=<YOUR_NEO4J_PASSWORD>
 22 | export NEO4J_URL=<YOUR_NEO4J_DATABASE_URL>
 23 | ```
 24 | 
 25 | ## Install WhyHow SDK
 26 | 
 27 | If you haven't already, install the `WhyHow SDK` using pip:
 28 | 
 29 | ```shell
 30 | pip install whyhow
 31 | ```
 32 | 
 33 | ## Configure the WhyHow Client
 34 | 
 35 | With your environment variables set, you can now configure the `WhyHow` client in your Python script. The client will automatically read in your environment variables, or you can override those values by specifying them in the client config.
 36 | 
 37 | ```shell
 38 | import os
 39 | from whyhow import WhyHow
 40 | 
 41 | client = WhyHow(api_key=<your WhyHow API key>)
 42 | ```
 43 | 
 44 | ## Option 1 - Create the Knowledge Graph from a schema
 45 | 
 46 | First, you need to define the namespace for your project and specify the paths to your Harry Potter book documents. Your namespace is a logical grouping of the raw data you upload, the schema you define, and the graphs you create. Namespaces are meant to be tightly scoped to your use case. You can create as many namespaces as you want.
 47 | 
 48 | ```shell
 49 | namespace = "harry-potter"
 50 | documents = [
 51 |     "path/to/harry_potter_and_the_philosophers_stone.pdf",
 52 |     "path/to/harry_potter_and_the_chamber_of_secrets.pdf"
 53 |     # Add paths to the rest of the Harry Potter series documents
 54 | ]
 55 | 
 56 | # Add documents to your namespace
 57 | documents_response = client.graph.add_documents(namespace, documents)
 58 | print("Documents Added:", documents_response)
 59 | 
 60 | ```
 61 | 
 62 | Next, you must create a schema which defines the entities, relationships, and patterns you'd like to use to construct the graph. Create this and save it as a JSON file.
 63 | 
 64 | ```shell
 65 | 
 66 | #schema.json
 67 | 
 68 | {
 69 |   "entities": [
 70 |     {
 71 |       "name": "character",
 72 |       "description": "A person appearing in the book, e.g., Harry Potter, Ron Weasley, Hermione Granger, Albus Dumbledore."
 73 |     },
 74 |     {
 75 |       "name": "object",
 76 |       "description": "Inanimate items that characters use or interact with, e.g., wand, Philosopher's Stone, Invisibility Cloak, broomstick."
 77 |     }
 78 |   ],
 79 |   "relations": [
 80 |     {
 81 |       "name": "friends with",
 82 |       "description": "Denotes a friendly relationship between characters."
 83 |     },
 84 |     {
 85 |       "name": "interacts with",
 86 |       "description": "Describes a scenario in which a character engages with another character, creature, or object."
 87 |     },
 88 |   ],
 89 |   "patterns": [
 90 |     {
 91 |       "head": "character",
 92 |       "relation": "friends with",
 93 |       "tail": "character",
 94 |       "description": "One character is friends with another, e.g., Harry Potter is friends with Ron Weasley."
 95 |     },
 96 |     {
 97 |       "head": "character",
 98 |       "relation": "interacts with",
 99 |       "tail": "object",
100 |       "description": "A character interacting with an object, e.g., Harry Potter interacts with the Invisibility Cloak."
101 |     }
102 |   ]
103 | }
104 | 
105 | ```
106 | 
107 | Then, create the graph using the schema and the uploaded documents:
108 | 
109 | ```shell
110 | # Create graph from schema
111 | 
112 | schema = "./schema.json"
113 | create_graph_with_schema_response = client.graph.create_graph_from_schema(namespace, schema)
114 | print(create_graph_with_schema_response)
115 | # Creating your graph
116 | 
117 | ```
118 | 
119 | ## Option 2 - Create the Knowledge Graph from seed questions
120 | 
121 | Alternatively, you can create a graph using seed concepts in the form of questions written in natural language. We'll create a new namespace and upload the same data.
122 | 
123 | ```shell
124 | namespace = "harry-potter-2"
125 | documents = [
126 |     "path/to/harry_potter_and_the_philosophers_stone.pdf",
127 |     "path/to/harry_potter_and_the_chamber_of_secrets.pdf"
128 |     # Add paths to the rest of the Harry Potter series documents
129 | ]
130 | 
131 | # Add documents to your namespace
132 | documents_response = client.graph.add_documents(namespace, documents)
133 | print("Documents Added:", documents_response)
134 | 
135 | ```
136 | 
137 | Create the knowledge graph from the seed questions and the uploaded documents:
138 | 
139 | ```shell
140 | questions = ["What does Harry look like?","What does Hermione look like?","What does Ron look like?"]
141 | extracted_graph = client.graph.create_graph(namespace, questions)
142 | print("Extracted Graph:", extracted_graph)
143 | 
144 | ```
145 | 
146 | ## Option 3 - Create the Knowledge Graph from CSV
147 | 
148 | WhyHow also supports creating a graph from structured data in the form a CSV file. Note, right now we only support creating a graph from one CSV file per namespace. If you upload more than one file, the first will be overwritten.
149 | 
150 | ```shell
151 | namespace = "specialists"
152 | documents = ["../examples/assets/specialists.csv"]
153 | schema_file = "../examples/assets/specialists.json"
154 | 
155 | # Automatically generate a schema
156 | schema = client.graph.generate_schema(documents=documents)
157 | print(schema)
158 | 
159 | # Add documents to your namespace
160 | documents_response = client.graph.add_documents(
161 |     namespace=namespace, documents=documents)
162 | 
163 | ```
164 | 
165 | You can automatically generate a schema from a CSV document using the `generate_schema` method of the `GraphAPI` class.
166 | 
167 | ```python
168 | csv_documents = ["path/to/your/csv/file.csv"]
169 | generated_schema = client.graph.generate_schema(documents=csv_documents)
170 | print(generated_schema)
171 | ```
172 | 
173 | Use the `create_graph_from_csv` function to create a graph from the uploaded CSV file. The function will automatically use the schema provided to generate the graph
174 | 
175 | ```shell
176 | csv_graph = client.graph.create_graph_from_csv(
177 |     namespace=namespace, schema_file=schema_file
178 | )
179 | 
180 | print(csv_graph)
181 | 
182 | ```
183 | 
184 | ## Querying the Knowledge Graph
185 | 
186 | With the graphs created, you can now query them to find specific information:
187 | 
188 | ```shell
189 | # Query the graph created from csv using specific entities and relations
190 | query = "Who speaks English and live in Houston?"
191 | entities = ["English","Houston"]
192 | relations = ["SPEAKS","LIVE_IN"]
193 | 
194 | specific_query_response = client.graph.query_graph_specific(
195 |     namespace=namespace,
196 |     query=query,
197 |     entities=entities,
198 |     relations=relations,
199 |     include_triples=False,
200 |     include_chunks=False,
201 | )
202 | 
203 | print("Specific Query Response:", specific_query_response)
204 | 
205 | # Query graph created from schema
206 | query = "Who is Harry friends with?"
207 | namespace = "harry-potter"
208 | schema_query_response = client.graph.query_graph(namespace, query)
209 | print("Query Response:", query_response)
210 | 
211 | # Query graph created from seed questions
212 | query = "Who wears a Cloak?"
213 | namespace = "harry-potter-2"
214 | seed_questions_query_response = client.graph.query_graph(namespace, query)
215 | print("Query Response:", query_response)
216 | 
217 | # Include the triples in the return
218 | query = "Who is Harry friends with?"
219 | namespace = "harry-potter"
220 | schema_query_response = client.graph.query_graph(namespace, query, include_triples = True)
221 | print("Query Response:", query_response)
222 | 
223 | # Include the chunk context in the return
224 | query = "Who is Harry friends with?"
225 | namespace = "harry-potter"
226 | schema_query_response = client.graph.query_graph(namespace, query, include_chunks = True)
227 | print("Query Response:", query_response)
228 | ```
229 | 


--------------------------------------------------------------------------------
/examples/assets/specialists.csv:
--------------------------------------------------------------------------------
  1 | Name,Gender,Specialization,Subspecialties,Qualifications,Years of Experience,Hospital Affiliation,Department,City,State,Availability,Language 1,Language 2,Language 3,Consultation Fees,Patient Ratings,Insurance Accepted,
  2 | Allison Mcmillan,Female,Pediatrics,Sports Medicine,DO,11,Barnes-Wise,Outpatient,New York,NY,"Tuesday, 17:00 - 20:00",English,,,$406.00,3.1,PrimeHealth Assurance
  3 | Mr. Steven Brooks,Male,Gastroenterology,Pediatric,MD,7,Wright-Kim,Outpatient,Los Angeles,CA,"Tuesday, 16:00 - 20:00",English,,,$184.00,1.3,HarmonyHealth Plans
  4 | Ann Austin,Female,Psychiatry,Geriatric,MS,23,Lucero PLC,Emergency,Phoenix,AZ,"Tuesday, 9:00 - 18:00",English,French,,$206.00,3.7,GuardianHealth Solutions
  5 | Julie Johnson,Female,Dermatology,Sleep Medicine,MS,32,Gill PLC,Outpatient,Houston,TX,"Monday, 8:00 - 18:00",English,,,$273.00,2.8,VitalityCare Health
  6 | Christopher Brown,Male,Endocrinology,Addiction Medicine,MD,25,Murphy-Montgomery,Radiology,Chicago,IL,"Thursday, 17:00 - 18:00",English,Hindi,Spanish,$198.00,2.8,OptimumCare Health
  7 | Christopher Hamilton,Male,Cardiology,Sleep Medicine,MS,31,Jones PLC,Outpatient,Los Angeles,CA,"Wednesday, 11:00 - 18:00",English,Hindi,Mandarin,$350.00,2.7,LifeShield Insurance
  8 | Mark Ward,Male,Pediatrics,Addiction Medicine,MD,25,Meza-Preston,General Medicine,New York,NY,"Wednesday, 15:00 - 18:00",English,Hindi,,$204.00,2.3,EverCare Insurance
  9 | Julie Carey,Female,Dermatology,Sleep Medicine,MD,11,Griffin-Herring,Outpatient,Houston,TX,"Monday, 10:00 - 18:00",English,Spanish,Hindi,$438.00,3.8,GuardianHealth Solutions
 10 | Douglas Wells,Male,Dermatology,Pediatric,DO,8,Gregory-Gibbs,Outpatient,Phoenix,AZ,"Monday, 11:00 - 19:00",English,French,,$454.00,3.6,LifeShield Insurance
 11 | Richard Braun,Male,Dermatology,Sports Medicine,MD,28,Forbes-Walsh,Surgery,Houston,TX,"Tuesday, 14:00 - 19:00",English,Hindi,,$496.00,2.3,HealthFirst Coverage
 12 | Stephen Williamson,Male,Neurology,Addiction Medicine,MBBS,37,"Sherman, Mooney and Weber",Radiology,Houston,TX,"Monday, 9:00 - 20:00",English,,,$333.00,1.7,PremierWell Insurance
 13 | Michael White,Male,Pediatrics,Addiction Medicine,MD,16,"Sanchez, Roy and Wright",Radiology,Houston,TX,"Friday, 15:00 - 20:00",English,Hindi,Spanish,$205.00,4.7,LifeWell Coverage
 14 | Taylor Sanford,Female,Dermatology,Sleep Medicine,MBBS,11,Adams-Brown,Outpatient,Phoenix,AZ,"Wednesday, 16:00 - 20:00",English,Mandarin,French,$152.00,1.1,HarmonyHealth Plans
 15 | Brittney Reyes,Female,Gastroenterology,Sleep Medicine,MD,29,Jennings Group,Surgery,Los Angeles,CA,"Tuesday, 14:00 - 18:00",English,,,$230.00,4.9,EverCare Insurance
 16 | Eric Williams,Male,Oncology,Pediatric,MD,27,"Carr, Bush and Bush",General Medicine,Phoenix,AZ,"Thursday, 9:00 - 18:00",English,Hindi,,$170.00,3.5,HealthFirst Coverage
 17 | Robert Ramos,Male,Neurology,Sports Medicine,DO,14,Mendoza and Sons,General Medicine,Los Angeles,CA,"Friday, 17:00 - 19:00",English,Hindi,,$349.00,1.9,VitalityCare Health
 18 | Michelle Chen,Female,Dermatology,Addiction Medicine,MD,15,"Lee, Burke and Rivers",Emergency,Houston,TX,"Friday, 17:00 - 18:00",English,Spanish,Mandarin,$474.00,1.7,UnityWell Insurance
 19 | Taylor Baker,Male,Endocrinology,Geriatric,MS,27,Li-Nelson,Emergency,Houston,TX,"Thursday, 12:00 - 19:00",English,,,$298.00,1.7,SuperiorHealth Plans
 20 | Brendan Olson,Male,Dermatology,Geriatric,MS,5,"Torres, Mejia and Moore",Radiology,Los Angeles,CA,"Friday, 17:00 - 19:00",English,Mandarin,,$238.00,2.4,HealthGuard Assurance
 21 | David Meadows,Male,Neurology,Addiction Medicine,MD,25,Tran Ltd,General Medicine,Los Angeles,CA,"Wednesday, 8:00 - 19:00",English,,,$401.00,2.7,BrightWell Insurance
 22 | Matthew Gentry,Male,Endocrinology,Pediatric,MD,29,Rosario Ltd,Radiology,Houston,TX,"Monday, 15:00 - 18:00",English,,,$452.00,2,SuperiorHealth Plans
 23 | Sara Jimenez,Female,Endocrinology,Pediatric,DO,28,Edwards LLC,Radiology,Los Angeles,CA,"Thursday, 17:00 - 20:00",English,,,$115.00,4.2,TrueHealth Insurance
 24 | Rebekah Sparks,Female,Urology,Pediatric,MD,30,Jennings-Phillips,Surgery,Houston,TX,"Friday, 13:00 - 18:00",English,,,$371.00,2.4,PureHealth Insurance
 25 | Paul Miranda,Male,Psychiatry,Sports Medicine,MD,19,"Reid, Figueroa and Thomas",Emergency,Houston,TX,"Monday, 15:00 - 20:00",English,Mandarin,Spanish,$117.00,1.4,SuperiorHealth Plans,
 26 | Kendra Clark,Female,Orthopedics,Pediatric,DO,14,Stevens Inc,General Medicine,Los Angeles,CA,"Monday, 15:00 - 20:00",English,French,,$196.00,2.6,HarmonyHealth Plans,
 27 | Christina Bell,Female,Psychiatry,Pediatric,DO,38,"Anthony, Martinez and White",Outpatient,Houston,TX,"Friday, 9:00 - 20:00",English,Hindi,Spanish,$291.00,1.8,AdvantageCare Health,
 28 | Corey Moore,Male,Pediatrics,Sleep Medicine,MBBS,14,Leonard-Brown,Emergency,New York,NY,"Wednesday, 14:00 - 18:00",English,Spanish,Hindi,$325.00,1.1,TrueHealth Insurance,
 29 | Sean Wolf,Male,Dermatology,Geriatric,DO,25,Hicks PLC,Outpatient,Chicago,IL,"Wednesday, 16:00 - 19:00",English,Spanish,French,$425.00,3.3,OptimumCare Health,
 30 | Holly Martinez,Female,Pediatrics,Geriatric,DO,9,Solis Group,Outpatient,Phoenix,AZ,"Tuesday, 13:00 - 20:00",English,French,,$404.00,4.1,HealthFirst Coverage,
 31 | Karen Mcintyre,Female,Oncology,Addiction Medicine,MD,13,"Murphy, Cochran and Bush",Radiology,Los Angeles,CA,"Monday, 12:00 - 18:00",English,,,$367.00,3.8,PureHealth Insurance,
 32 | James Perez,Male,Pediatrics,Sports Medicine,DO,31,Peterson Ltd,Radiology,Chicago,IL,"Thursday, 17:00 - 19:00",English,Hindi,,$354.00,4.8,AdvantageCare Health,
 33 | Krystal Luna,Female,Gastroenterology,Sports Medicine,MS,20,Hicks-Santana,Radiology,Chicago,IL,"Friday, 11:00 - 20:00",English,,,$341.00,2.3,HarmonyHealth Plans,
 34 | Melissa Gilbert,Female,Gastroenterology,Geriatric,DO,24,Stein-Davis,Outpatient,Los Angeles,CA,"Thursday, 11:00 - 18:00",English,Spanish,Hindi,$213.00,2.9,GuardianHealth Solutions,
 35 | Jesse Levine,Male,Orthopedics,Sleep Medicine,MS,17,Jordan LLC,Emergency,Phoenix,AZ,"Tuesday, 13:00 - 19:00",English,,,$204.00,2.3,PremierWell Insurance,
 36 | Alex Hill,Male,Cardiology,Sports Medicine,MBBS,15,Ashley PLC,Emergency,Phoenix,AZ,"Thursday, 16:00 - 19:00",English,,,$187.00,3.3,HealthPlus Coverage,
 37 | Heather Miller,Female,Endocrinology,Addiction Medicine,MS,32,"Stout, Morales and Clark",General Medicine,New York,NY,"Friday, 8:00 - 20:00",English,,,$352.00,3.2,PremierWell Insurance,
 38 | Mathew Bauer,Male,Pediatrics,Sports Medicine,MD,17,Berry-Roberson,Surgery,Chicago,IL,"Thursday, 13:00 - 19:00",English,Hindi,,$350.00,3.2,HealthPlus Coverage,
 39 | Amber Lee,Female,Endocrinology,Sports Medicine,MS,18,Clay-Wood,Surgery,Houston,TX,"Wednesday, 9:00 - 20:00",English,French,,$275.00,1.2,EliteCare Insurers,
 40 | Mallory Ward,Female,Dermatology,Pediatric,MD,13,Nguyen-Burns,General Medicine,New York,NY,"Wednesday, 13:00 - 18:00",English,,,$276.00,4.3,TrueHealth Insurance,
 41 | James Lyons,Male,Pediatrics,Sports Medicine,MD,9,Smith-Huang,General Medicine,New York,NY,"Thursday, 11:00 - 18:00",English,Mandarin,Spanish,$242.00,3.9,PrimeHealth Assurance,
 42 | Sheryl Walker MD,Female,Psychiatry,Geriatric,MD,34,Thornton-Rivera,Radiology,Houston,TX,"Tuesday, 10:00 - 19:00",English,Hindi,Spanish,$279.00,4,PremierWell Insurance,
 43 | Jessica Boone,Female,Urology,Geriatric,MBBS,15,"Bradshaw, Cooley and Robinson",General Medicine,Los Angeles,CA,"Friday, 14:00 - 20:00",English,French,Hindi,$447.00,2.6,EverCare Insurance,
 44 | Melissa Robbins,Female,Urology,Sports Medicine,MBBS,28,"Stuart, Craig and Mcguire",Radiology,New York,NY,"Tuesday, 8:00 - 18:00",English,Mandarin,,$459.00,3.2,LifeShield Insurance,
 45 | Timothy Wheeler,Male,Pediatrics,Sleep Medicine,MBBS,30,Griffin PLC,Outpatient,New York,NY,"Thursday, 13:00 - 19:00",English,Hindi,,$274.00,3.8,EverCare Insurance,
 46 | Casey Scott,Male,Psychiatry,Sleep Medicine,DO,6,Griffith Inc,Outpatient,Chicago,IL,"Friday, 8:00 - 20:00",English,Spanish,,$236.00,3.4,PremierWell Insurance,
 47 | Teresa Powers,Female,Urology,Geriatric,DO,19,"Jefferson, Weiss and Foley",Radiology,Phoenix,AZ,"Friday, 16:00 - 20:00",English,French,Hindi,$208.00,3.7,GuardianHealth Solutions,
 48 | Kathryn Luna,Female,Cardiology,Sports Medicine,MD,12,"Hughes, Reyes and Flores",Surgery,New York,NY,"Monday, 11:00 - 18:00",English,French,,$455.00,3.6,HealthGuard Assurance,
 49 | Sara Jones,Female,Dermatology,Addiction Medicine,MD,12,"Pham, Anderson and Bell",Outpatient,New York,NY,"Wednesday, 8:00 - 20:00",English,Hindi,,$146.00,1.7,LifeWell Coverage,
 50 | Susan Wilson,Female,Endocrinology,Geriatric,MS,12,Patrick Ltd,Emergency,Chicago,IL,"Tuesday, 15:00 - 19:00",English,Spanish,,$309.00,1.3,LifeWell Coverage,
 51 | Brandon Jones,Male,Pediatrics,Addiction Medicine,MBBS,17,"Delacruz, Scott and Collins",Emergency,Los Angeles,CA,"Monday, 17:00 - 19:00",English,,,$246.00,2.6,SecureHealth Plans,
 52 | Nicole Hall,Female,Gastroenterology,Sleep Medicine,MBBS,34,Robinson-Patel,Surgery,Chicago,IL,"Tuesday, 17:00 - 20:00",English,Hindi,Spanish,$195.00,4.5,BrightWell Insurance,
 53 | Kristin Stephenson,Female,Endocrinology,Sleep Medicine,MS,26,Sandoval-Merritt,Emergency,Chicago,IL,"Monday, 8:00 - 19:00",English,,,$426.00,1.2,EliteCare Insurers,
 54 | Deborah Mclaughlin,Female,Dermatology,Geriatric,MBBS,20,Crawford-Mccann,General Medicine,Phoenix,AZ,"Tuesday, 14:00 - 18:00",English,French,Spanish,$496.00,1.6,BrightWell Insurance,
 55 | William Barron,Male,Dermatology,Sleep Medicine,MD,20,Olson Ltd,Emergency,Chicago,IL,"Wednesday, 13:00 - 20:00",English,Mandarin,,$477.00,3.7,HealthGuard Assurance,
 56 | Jonathon Schwartz,Male,Cardiology,Pediatric,MBBS,29,Flores-Walsh,General Medicine,New York,NY,"Thursday, 16:00 - 18:00",English,French,Spanish,$219.00,1.8,HealthNet Solutions,
 57 | Daniel Jenkins,Male,Psychiatry,Pediatric,MD,37,"Phillips, Mahoney and Levy",Outpatient,Chicago,IL,"Wednesday, 9:00 - 20:00",English,,,$468.00,3.4,HealthPlus Coverage,
 58 | Sean Holland,Male,Endocrinology,Addiction Medicine,DO,7,"Rodgers, Patel and Powell",Outpatient,Houston,TX,"Tuesday, 12:00 - 19:00",English,Spanish,French,$142.00,4.9,PrimeHealth Assurance,
 59 | Angela Whitaker,Female,Gastroenterology,Pediatric,MBBS,10,Carroll Ltd,Outpatient,Los Angeles,CA,"Friday, 9:00 - 19:00",English,Mandarin,,$465.00,4.1,EliteCare Insurers,
 60 | Melissa Williams,Female,Gastroenterology,Addiction Medicine,MBBS,37,Campbell Group,Emergency,Los Angeles,CA,"Thursday, 13:00 - 18:00",English,French,Spanish,$357.00,2.3,VitalityCare Health,
 61 | Denise Sheppard,Female,Psychiatry,Pediatric,MD,9,Norman and Sons,Outpatient,Los Angeles,CA,"Friday, 15:00 - 20:00",English,Mandarin,,$441.00,3.5,HealthGuard Assurance,
 62 | Seth Ford,Male,Gastroenterology,Addiction Medicine,MS,36,"Carter, Torres and Bates",General Medicine,New York,NY,"Monday, 13:00 - 19:00",English,,,$348.00,3.3,AdvantageCare Health,
 63 | Mary Young,Female,Pediatrics,Sleep Medicine,MD,35,"Garcia, Good and Ortiz",Emergency,Chicago,IL,"Thursday, 10:00 - 18:00",English,,,$167.00,2.3,SuperiorHealth Plans,
 64 | Bryan Horne,Male,Psychiatry,Sports Medicine,MD,25,Acevedo-Taylor,General Medicine,Chicago,IL,"Friday, 11:00 - 19:00",English,,,$497.00,4.3,VitalityCare Health,
 65 | Linda Vasquez,Female,Orthopedics,Addiction Medicine,MD,10,Colon PLC,Outpatient,Los Angeles,CA,"Thursday, 14:00 - 19:00",English,,,$252.00,3.8,LifeShield Insurance,
 66 | Troy Booth,Male,Gastroenterology,Geriatric,MD,12,Thompson-Austin,Surgery,Phoenix,AZ,"Wednesday, 8:00 - 18:00",English,Spanish,,$353.00,4.3,LifeWell Coverage,
 67 | Samantha Miller,Female,Cardiology,Addiction Medicine,DO,30,Yu-Weber,Outpatient,Chicago,IL,"Tuesday, 9:00 - 19:00",English,French,Mandarin,$164.00,4.3,HealthNet Solutions,
 68 | Carolyn Berg,Female,Endocrinology,Addiction Medicine,MS,31,"Pena, Carter and Bowen",Surgery,Chicago,IL,"Wednesday, 9:00 - 19:00",English,Mandarin,Spanish,$331.00,5,EverCare Insurance,
 69 | Kristen Bailey,Female,Oncology,Sports Medicine,MBBS,18,Parker-Young,General Medicine,Los Angeles,CA,"Tuesday, 8:00 - 18:00",English,,,$199.00,3.5,OptimumCare Health,
 70 | Shelby Jackson,Female,Pediatrics,Addiction Medicine,MD,22,Webb Ltd,Emergency,Phoenix,AZ,"Thursday, 11:00 - 19:00",English,Spanish,,$331.00,3.9,TrueHealth Insurance,
 71 | Edward Wright,Male,Orthopedics,Sports Medicine,MBBS,25,"Martin, Johnson and Kidd",General Medicine,New York,NY,"Friday, 15:00 - 20:00",English,Spanish,Mandarin,$212.00,3.9,SuperiorHealth Plans,
 72 | Tara Webb,Female,Pediatrics,Sports Medicine,DO,29,Wright-Martinez,Radiology,Los Angeles,CA,"Thursday, 13:00 - 20:00",English,,,$471.00,2,PremierWell Insurance,
 73 | Connie Rogers,Female,Urology,Pediatric,DO,11,Haas Ltd,Radiology,Phoenix,AZ,"Wednesday, 8:00 - 19:00",English,Hindi,,$496.00,2.5,HealthFirst Coverage,
 74 | Jason Acosta,Male,Orthopedics,Geriatric,MS,16,"Price, Thomas and Welch",General Medicine,Los Angeles,CA,"Monday, 17:00 - 18:00",English,Spanish,,$429.00,3.3,OptimumCare Health,
 75 | Danielle Jackson,Female,Gastroenterology,Addiction Medicine,MD,10,"Graham, Martinez and Gonzales",Surgery,Houston,TX,"Monday, 16:00 - 20:00",English,Mandarin,Hindi,$433.00,3.7,GuardianHealth Solutions,
 76 | Amanda Miller DVM,Female,Dermatology,Sleep Medicine,MBBS,32,Garcia Ltd,Outpatient,Phoenix,AZ,"Thursday, 8:00 - 20:00",English,Hindi,Spanish,$231.00,3.5,PremierWell Insurance,
 77 | Mr. Christopher Miller MD,Male,Psychiatry,Sports Medicine,DO,21,Jackson-Cummings,Emergency,Houston,TX,"Wednesday, 17:00 - 18:00",English,Mandarin,,$451.00,2.5,PrimeHealth Assurance,
 78 | Kevin Mason,Male,Neurology,Pediatric,DO,32,Brown-Miller,Outpatient,Phoenix,AZ,"Wednesday, 15:00 - 18:00",English,Mandarin,,$261.00,4.3,TotalCare Coverage,
 79 | Rebecca Simpson,Female,Neurology,Sports Medicine,MBBS,28,"Baldwin, Harris and Spencer",Surgery,Houston,TX,"Tuesday, 15:00 - 19:00",English,Spanish,,$129.00,4.3,UnityWell Insurance,
 80 | Thomas Kelly,Male,Oncology,Addiction Medicine,MBBS,36,Howell LLC,Radiology,Los Angeles,CA,"Friday, 8:00 - 20:00",English,Spanish,Hindi,$160.00,2.6,GuardianHealth Solutions,
 81 | Andrew Thomas,Male,Gastroenterology,Pediatric,MBBS,8,"Williams, Cox and Taylor",Emergency,Chicago,IL,"Thursday, 10:00 - 19:00",English,Hindi,French,$330.00,1.3,GuardianHealth Solutions,
 82 | Susan Rodriguez,Female,Gastroenterology,Addiction Medicine,MS,7,Kane-Phillips,Emergency,Los Angeles,CA,"Friday, 10:00 - 20:00",English,Mandarin,Spanish,$277.00,4.2,PureHealth Insurance,
 83 | Angela Payne,Female,Gastroenterology,Sleep Medicine,MD,38,Evans LLC,Emergency,Los Angeles,CA,"Wednesday, 17:00 - 18:00",English,French,,$419.00,1.5,HealthNet Solutions,
 84 | Kristie Potter,Female,Endocrinology,Geriatric,MD,25,Murphy LLC,Emergency,New York,NY,"Thursday, 12:00 - 18:00",English,Mandarin,,$235.00,4.7,HarmonyHealth Plans,
 85 | Sarah Hudson,Female,Oncology,Addiction Medicine,MBBS,20,"Porter, Barrera and Snyder",Surgery,Phoenix,AZ,"Thursday, 12:00 - 19:00",English,,,$403.00,4.8,TrueHealth Insurance,
 86 | Sue Whitaker,Female,Neurology,Addiction Medicine,MD,6,Kennedy-Newman,General Medicine,Chicago,IL,"Wednesday, 12:00 - 20:00",English,,,$440.00,1.5,SecureHealth Plans,
 87 | Cody Chavez,Male,Neurology,Pediatric,DO,35,Moore-James,Surgery,New York,NY,"Tuesday, 12:00 - 20:00",English,Spanish,,$112.00,4.9,HarmonyHealth Plans,
 88 | Thomas Smith,Male,Orthopedics,Geriatric,MD,31,Browning-Smith,General Medicine,Phoenix,AZ,"Tuesday, 12:00 - 20:00",English,Mandarin,,$104.00,2.1,LifeShield Insurance,
 89 | Samuel Moreno,Male,Psychiatry,Sleep Medicine,DO,29,"Velasquez, Zuniga and Reynolds",Emergency,Houston,TX,"Thursday, 11:00 - 19:00",English,Spanish,Mandarin,$324.00,2.2,LifeWell Coverage,
 90 | Elizabeth Bright,Female,Orthopedics,Sleep Medicine,MD,35,"Webb, Wong and Nichols",Emergency,New York,NY,"Thursday, 11:00 - 20:00",English,,,$102.00,3,VitalityCare Health,
 91 | William Williams,Male,Pediatrics,Sports Medicine,MBBS,21,Rangel-Ferrell,Emergency,Phoenix,AZ,"Monday, 11:00 - 20:00",English,Hindi,French,$289.00,2.8,TotalCare Coverage,
 92 | Christopher Huff,Male,Pediatrics,Addiction Medicine,MD,37,Smith and Sons,Emergency,Houston,TX,"Thursday, 17:00 - 18:00",English,Hindi,,$117.00,3.5,HealthFirst Coverage,
 93 | Richard Foster,Male,Cardiology,Sleep Medicine,DO,39,"Rodriguez, Thompson and Marquez",Surgery,Phoenix,AZ,"Monday, 12:00 - 19:00",English,French,Hindi,$479.00,1.2,UnityWell Insurance,
 94 | Sharon Gutierrez,Female,Urology,Addiction Medicine,MS,9,"Foster, Martin and Horn",General Medicine,Houston,TX,"Friday, 13:00 - 19:00",English,,,$189.00,1.4,EverCare Insurance,
 95 | Melissa Martin,Female,Dermatology,Sleep Medicine,MS,10,Walsh-Williams,General Medicine,Houston,TX,"Thursday, 15:00 - 19:00",English,French,,$244.00,4,GuardianHealth Solutions,
 96 | Dean Curtis,Male,Dermatology,Pediatric,MS,14,"Soto, Rodriguez and Stanley",Surgery,Los Angeles,CA,"Thursday, 16:00 - 18:00",English,,,$121.00,2.4,EverCare Insurance,
 97 | Jeremy King,Male,Pediatrics,Sleep Medicine,DO,17,Lewis-Reese,Outpatient,Phoenix,AZ,"Friday, 9:00 - 20:00",English,Mandarin,,$195.00,4,HealthGuard Assurance,
 98 | Paul Fisher,Male,Orthopedics,Geriatric,MBBS,31,"King, Adams and Mccormick",General Medicine,Chicago,IL,"Thursday, 14:00 - 19:00",English,Spanish,French,$458.00,2.9,HealthGuard Assurance,
 99 | Lori Vaughan,Female,Pediatrics,Sports Medicine,MD,24,Hunter-Chavez,Surgery,Houston,TX,"Tuesday, 12:00 - 19:00",English,Mandarin,Spanish,$472.00,2.8,UnityWell Insurance,
100 | Rhonda Evans,Female,Neurology,Sports Medicine,DO,29,Weaver-Clark,Surgery,Phoenix,AZ,"Wednesday, 12:00 - 18:00",English,Spanish,,$296.00,2.2,ProHealth Assurance,
101 | Jeremy Henson,Male,Oncology,Sleep Medicine,MD,39,Morrison-Ross,General Medicine,Chicago,IL,"Tuesday, 13:00 - 20:00",English,Mandarin,,$333.00,4.1,HealthFirst Coverage,


--------------------------------------------------------------------------------
/examples/assets/specialists.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "entities": [
  3 |     {
  4 |       "name": "Name",
  5 |       "set_type_as": "",
  6 |       "property_columns": [
  7 |         "Gender",
  8 |         "Qualifications",
  9 |         "Years of Experience",
 10 |         "Availability",
 11 |         "Consultation Fees",
 12 |         "Patient Ratings",
 13 |         "Insurance Accepted"
 14 |       ],
 15 |       "description": ""
 16 |     },
 17 |     {
 18 |       "name": "Specialization",
 19 |       "set_type_as": "Specialty",
 20 |       "property_columns": [],
 21 |       "description": ""
 22 |     },
 23 |     {
 24 |       "name": "Subspecialties",
 25 |       "set_type_as": "Specialty",
 26 |       "property_columns": [],
 27 |       "description": ""
 28 |     },
 29 |     {
 30 |       "name": "Hospital Affiliation",
 31 |       "set_type_as": "Hospital",
 32 |       "property_columns": [],
 33 |       "description": ""
 34 |     },
 35 |     {
 36 |       "name": "City",
 37 |       "set_type_as": "",
 38 |       "property_columns": [],
 39 |       "description": ""
 40 |     },
 41 |     {
 42 |       "name": "Language 1",
 43 |       "set_type_as": "Language",
 44 |       "property_columns": [],
 45 |       "description": ""
 46 |     },
 47 |     {
 48 |       "name": "Language 2",
 49 |       "set_type_as": "Language",
 50 |       "property_columns": [],
 51 |       "description": ""
 52 |     },
 53 |     {
 54 |       "name": "Language 3",
 55 |       "set_type_as": "Language",
 56 |       "property_columns": [],
 57 |       "description": ""
 58 |     }
 59 |   ],
 60 |   "patterns": [
 61 |     {
 62 |       "head": "Name",
 63 |       "relation": "specializes_in",
 64 |       "tail": "Specialization",
 65 |       "description": ""
 66 |     },
 67 |     {
 68 |       "head": "Name",
 69 |       "relation": "specializes_in",
 70 |       "tail": "Subspecialties",
 71 |       "description": ""
 72 |     },
 73 |     {
 74 |       "head": "Name",
 75 |       "relation": "works_at",
 76 |       "tail": "Hospital Affiliation",
 77 |       "description": ""
 78 |     },
 79 |     {
 80 |       "head": "Name",
 81 |       "relation": "live_in",
 82 |       "tail": "City",
 83 |       "description": ""
 84 |     },
 85 |     {
 86 |       "head": "Name",
 87 |       "relation": "speaks",
 88 |       "tail": "Language 1",
 89 |       "description": ""
 90 |     },
 91 |     {
 92 |       "head": "Name",
 93 |       "relation": "speaks",
 94 |       "tail": "Language 2",
 95 |       "description": ""
 96 |     },
 97 |     {
 98 |       "head": "Name",
 99 |       "relation": "speaks",
100 |       "tail": "Language 3",
101 |       "description": ""
102 |     }
103 |   ]
104 | }
105 | 


--------------------------------------------------------------------------------
/examples/create_graph_from_csv.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Create a knowledge graph with a CSV and a preset schema"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "# Import the required libraries\n",
 17 |     "\n",
 18 |     "import os\n",
 19 |     "import json\n",
 20 |     "\n",
 21 |     "from dotenv import load_dotenv\n",
 22 |     "\n",
 23 |     "load_dotenv()"
 24 |    ]
 25 |   },
 26 |   {
 27 |    "cell_type": "code",
 28 |    "execution_count": null,
 29 |    "metadata": {},
 30 |    "outputs": [],
 31 |    "source": [
 32 |     "# Import whyhow\n",
 33 |     "from whyhow import WhyHow"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "# Initialize the whyhow client\n",
 43 |     "client = WhyHow(\n",
 44 |     "    api_key = os.environ.get(\"WHYHOW_API_KEY\"),\n",
 45 |     "    openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n",
 46 |     "    pinecone_api_key=os.getenv(\"PINECONE_API_KEY\"),\n",
 47 |     "    neo4j_url=os.getenv(\"NEO4J_URI\"),\n",
 48 |     "    neo4j_user=os.getenv(\"NEO4J_USER\"),\n",
 49 |     "    neo4j_password=os.getenv(\"NEO4J_PASSWORD\"),\n",
 50 |     ")"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": null,
 56 |    "metadata": {},
 57 |    "outputs": [],
 58 |    "source": [
 59 |     "namespace = \"whyhow_csv\"\n",
 60 |     "documents = [\"../examples/assets/specialists.csv\"]\n",
 61 |     "schema_file = \"../examples/assets/specialists.json\""
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "markdown",
 66 |    "metadata": {},
 67 |    "source": [
 68 |     "### Generate a schema\n",
 69 |     "With CSVs, you can automatically generate a schema for your document. The schema will automatically include all column names. You can modify the output to make the schema your own "
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": null,
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "# Note, you can only generate a schema from a csv file and only one csv file at a time\n",
 79 |     "\n",
 80 |     "schema = client.graph.generate_schema(documents=documents)\n",
 81 |     "print(schema)"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "markdown",
 86 |    "metadata": {},
 87 |    "source": [
 88 |     "### Add documents to namespace\n",
 89 |     "Upload one CSV file to the namespace. In this version, you can only upload one CSV file to a namespace so as not to confuse the schema."
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "# Add csv\n",
 99 |     "documents_response = client.graph.add_documents(\n",
100 |     "    namespace=namespace, documents=documents)\n"
101 |    ]
102 |   },
103 |   {
104 |    "cell_type": "markdown",
105 |    "metadata": {},
106 |    "source": [
107 |     "### Create graph from CSV\n",
108 |     "Use the `create_graph_from_csv` function to create a graph from the uploaded CSV file. The function will automatically use the schema provided to generate the graph"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": null,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "csv_graph = client.graph.create_graph_from_csv(\n",
118 |     "    namespace=namespace, schema_file=schema_file\n",
119 |     ")\n",
120 |     "\n",
121 |     "print(csv_graph)"
122 |    ]
123 |   },
124 |   {
125 |    "cell_type": "markdown",
126 |    "metadata": {},
127 |    "source": [
128 |     "### Query the graph"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": null,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "# Query the graph using a natural language query\n",
138 |     "query = \"Who speaks English and lives in Houston?\"\n",
139 |     "\n",
140 |     "query_response = client.graph.query_graph(\n",
141 |     "    namespace=namespace, query=query, include_triples=True)\n",
142 |     "\n",
143 |     "print(query_response)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {},
149 |    "source": [
150 |     "### Query the graph with specific entities and relations"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": null,
156 |    "metadata": {},
157 |    "outputs": [],
158 |    "source": [
159 |     "# Query the graph using specific entities and relations\n",
160 |     "query = \"Who speaks English and live in Houston?\"\n",
161 |     "entities = [\"English\",\"Houston\"]\n",
162 |     "relations = [\"SPEAKS\", \"LIVE_IN\"]\n",
163 |     "\n",
164 |     "specific_query_response = client.graph.query_graph_specific(\n",
165 |     "    namespace=namespace, \n",
166 |     "    query=query, \n",
167 |     "    entities=entities, \n",
168 |     "    relations=relations,\n",
169 |     "    include_triples=False,\n",
170 |     "    include_chunks=False,\n",
171 |     ")\n",
172 |     "\n",
173 |     "print(specific_query_response)"
174 |    ]
175 |   }
176 |  ],
177 |  "metadata": {
178 |   "kernelspec": {
179 |    "display_name": "venv",
180 |    "language": "python",
181 |    "name": "python3"
182 |   },
183 |   "language_info": {
184 |    "codemirror_mode": {
185 |     "name": "ipython",
186 |     "version": 3
187 |    },
188 |    "file_extension": ".py",
189 |    "mimetype": "text/x-python",
190 |    "name": "python",
191 |    "nbconvert_exporter": "python",
192 |    "pygments_lexer": "ipython3",
193 |    "version": "3.11.5"
194 |   }
195 |  },
196 |  "nbformat": 4,
197 |  "nbformat_minor": 2
198 | }
199 | 


--------------------------------------------------------------------------------
/examples/create_graph_from_questions.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Create a knowledge graph with questions"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "\n",
 18 |     "from dotenv import load_dotenv\n",
 19 |     "\n",
 20 |     "load_dotenv()"
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": null,
 26 |    "metadata": {},
 27 |    "outputs": [],
 28 |    "source": [
 29 |     "from whyhow import WhyHow"
 30 |    ]
 31 |   },
 32 |   {
 33 |    "cell_type": "code",
 34 |    "execution_count": null,
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "client = WhyHow(\n",
 39 |     "    api_key = os.environ.get(\"WHYHOW_API_KEY\"),\n",
 40 |     "    openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n",
 41 |     "    pinecone_api_key=os.getenv(\"PINECONE_API_KEY\"),\n",
 42 |     "    neo4j_url=os.getenv(\"NEO4J_URI\"),\n",
 43 |     "    neo4j_user=os.getenv(\"NEO4J_USER\"),\n",
 44 |     "    neo4j_password=os.getenv(\"NEO4J_PASSWORD\"),\n",
 45 |     ")"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "namespace = \"\"\n",
 55 |     "documents = []\n",
 56 |     "questions = []"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "metadata": {},
 62 |    "source": [
 63 |     "### Add documents to database"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "# Add documents\n",
 73 |     "documents_response = client.graph.add_documents(namespace, documents)\n",
 74 |     "print(documents_response)"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "markdown",
 79 |    "metadata": {},
 80 |    "source": [
 81 |     "### Create the graph"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": null,
 87 |    "metadata": {},
 88 |    "outputs": [],
 89 |    "source": [
 90 |     "# Create a graph\n",
 91 |     "extracted_graph = client.graph.create_graph(\n",
 92 |     "    namespace = namespace, \n",
 93 |     "    questions = questions\n",
 94 |     ")\n",
 95 |     "\n",
 96 |     "print(extracted_graph)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "markdown",
101 |    "metadata": {},
102 |    "source": [
103 |     "### Query the graph"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": null,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "# Query the graph\n",
113 |     "query = \"\"\n",
114 |     "query_response = client.graph.query_graph(\n",
115 |     "    namespace = namespace, \n",
116 |     "    query = query\n",
117 |     ")\n",
118 |     "\n",
119 |     "print(query_response)"
120 |    ]
121 |   }
122 |  ],
123 |  "metadata": {
124 |   "kernelspec": {
125 |    "display_name": "venv",
126 |    "language": "python",
127 |    "name": "python3"
128 |   },
129 |   "language_info": {
130 |    "codemirror_mode": {
131 |     "name": "ipython",
132 |     "version": 3
133 |    },
134 |    "file_extension": ".py",
135 |    "mimetype": "text/x-python",
136 |    "name": "python",
137 |    "nbconvert_exporter": "python",
138 |    "pygments_lexer": "ipython3",
139 |    "version": "3.10.13"
140 |   }
141 |  },
142 |  "nbformat": 4,
143 |  "nbformat_minor": 2
144 | }
145 | 


--------------------------------------------------------------------------------
/examples/create_graph_from_schema.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Create a knowledge graph with a preset schema"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "code",
 12 |    "execution_count": null,
 13 |    "metadata": {},
 14 |    "outputs": [],
 15 |    "source": [
 16 |     "import os\n",
 17 |     "import json\n",
 18 |     "\n",
 19 |     "from dotenv import load_dotenv\n",
 20 |     "\n",
 21 |     "load_dotenv()"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "from whyhow import WhyHow"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "client = WhyHow(\n",
 40 |     "    api_key = os.environ.get(\"WHYHOW_API_KEY\"),\n",
 41 |     "    openai_api_key=os.getenv(\"OPENAI_API_KEY\"),\n",
 42 |     "    pinecone_api_key=os.getenv(\"PINECONE_API_KEY\"),\n",
 43 |     "    neo4j_url=os.getenv(\"NEO4J_URI\"),\n",
 44 |     "    neo4j_user=os.getenv(\"NEO4J_USER\"),\n",
 45 |     "    neo4j_password=os.getenv(\"NEO4J_PASSWORD\"),\n",
 46 |     ")"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "documents = []\n",
 56 |     "namespace = \"\"\n",
 57 |     "schema_file = \"\""
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "markdown",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "### Add documents to database"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "# Add documents\n",
 74 |     "documents_response = client.graph.add_documents(\n",
 75 |     "    namespace=namespace, documents=documents)\n",
 76 |     "\n",
 77 |     "print(documents_response)"
 78 |    ]
 79 |   },
 80 |   {
 81 |    "cell_type": "markdown",
 82 |    "metadata": {},
 83 |    "source": [
 84 |     "### Create the graph"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "# Create a graph\n",
 94 |     "extracted_graph = client.graph.create_graph_from_schema(\n",
 95 |     "    namespace=namespace, schema_file=schema_file\n",
 96 |     ")\n",
 97 |     "\n",
 98 |     "print(extracted_graph)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "markdown",
103 |    "metadata": {},
104 |    "source": [
105 |     "### Query the graph"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "execution_count": null,
111 |    "metadata": {},
112 |    "outputs": [],
113 |    "source": [
114 |     "# Query the graph\n",
115 |     "query = \"\"\n",
116 |     "\n",
117 |     "entities = [\"\"]\n",
118 |     "relations = [\"\"]\n",
119 |     "\n",
120 |     "query_response = client.graph.query_graph_specific(\n",
121 |     "    namespace=namespace, \n",
122 |     "    query=query,\n",
123 |     "    entities=entities,\n",
124 |     "    relations=relations,\n",
125 |     "    include_triples=False\n",
126 |     "    include_chunks=False\n",
127 |     ")\n",
128 |     "\n",
129 |     "print(query_response.answer)"
130 |    ]
131 |   }
132 |  ],
133 |  "metadata": {
134 |   "kernelspec": {
135 |    "display_name": "venv",
136 |    "language": "python",
137 |    "name": "python3"
138 |   },
139 |   "language_info": {
140 |    "codemirror_mode": {
141 |     "name": "ipython",
142 |     "version": 3
143 |    },
144 |    "file_extension": ".py",
145 |    "mimetype": "text/x-python",
146 |    "name": "python",
147 |    "nbconvert_exporter": "python",
148 |    "pygments_lexer": "ipython3",
149 |    "version": "3.10.13"
150 |   }
151 |  },
152 |  "nbformat": 4,
153 |  "nbformat_minor": 2
154 | }
155 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: WhyHow SDK
 2 | nav:
 3 |   - Home: index.md
 4 |   - Installation: installation.md
 5 |   - Tutorial: tutorial.md
 6 |   - API Documentation: api.md
 7 | 
 8 | theme:
 9 |   name: material
10 |   palette:
11 |     scheme: slate
12 |   features:
13 |     - content.code.copy
14 |     - search.suggest
15 |     - search.highlight
16 |     - toc.follow
17 | 
18 | plugins:
19 |   - search
20 |   - mkdocstrings:
21 |       handlers:
22 |         python:
23 |           options:
24 |             docstring_style: numpy
25 |             show_root_heading: true
26 | 
27 | markdown_extensions:
28 |   - toc:
29 |       permalink: true
30 |       toc_depth: 3
31 |   - admonition
32 |   - tables
33 |   - pymdownx.details
34 |   - pymdownx.highlight:
35 |       anchor_linenums: true
36 |       line_spans: __span
37 |       pygments_lang_class: true
38 |   - pymdownx.inlinehilite
39 |   - pymdownx.snippets
40 |   - pymdownx.superfences
41 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [build-system]
  2 | requires = ["setuptools>=61.2"]
  3 | build-backend = "setuptools.build_meta"
  4 | 
  5 | [project]
  6 | name = "whyhow"
  7 | authors = [
  8 |     {name = "Tom Smoker", email = "tom@whyhow.ai"},
  9 |     {name = "Chris Rec", email = "chris@whyhow.ai"}
 10 | ]
 11 | description = "Whyhow automated KG SDK"
 12 | keywords = ["SDK", "KG"]
 13 | classifiers = ["Programming Language :: Python :: 3"]
 14 | requires-python = ">=3.10"
 15 | dependencies = [
 16 |     "httpx",
 17 |     "pydantic>1",
 18 | ]
 19 | dynamic = ["version"]
 20 | readme = "README.md"
 21 | license = {text = "MIT"}
 22 | 
 23 | [project.optional-dependencies]
 24 | dev = [
 25 |     "bandit[toml]",
 26 |     "black",
 27 |     "flake8",
 28 |     "flake8-docstrings",
 29 |     "fpdf",
 30 |     "isort",
 31 |     "mypy",
 32 |     "pydocstyle[toml]",
 33 |     "pytest-asyncio",
 34 |     "pytest-cov",
 35 |     "pytest-httpx",
 36 |     "pytest",
 37 | ]
 38 | docs = [
 39 |     "mkdocs",
 40 |     "mkdocstrings[python]",
 41 |     "mkdocs-material",
 42 |     "pymdown-extensions",
 43 | ]
 44 | 
 45 | [project.urls]
 46 | Homepage = "https://github.com/whyhow-ai/whyhow"
 47 | 
 48 | [project.scripts]
 49 | 
 50 | [tool.setuptools]
 51 | zip-safe = false
 52 | include-package-data = true
 53 | package-dir = {"" = "src"}
 54 | 
 55 | [tool.setuptools.packages.find]
 56 | where = ["src"]
 57 | namespaces = false
 58 | 
 59 | [tool.setuptools.package-data]
 60 | "*" = ["*.txt", "*.rst", "*.typed"]
 61 | 
 62 | [tool.setuptools.dynamic]
 63 | version = {attr = "whyhow.__version__"}
 64 | 
 65 | [tool.pydocstyle]
 66 | convention = "numpy"
 67 | add-ignore = "D301"
 68 | 
 69 | [tool.bandit]
 70 | 
 71 | [tool.black]
 72 | line-length = 79
 73 | preview = true
 74 | 
 75 | [tool.isort]
 76 | profile = "black"
 77 | line_length = 79
 78 | 
 79 | [tool.mypy]
 80 | plugins = [
 81 |   "pydantic.mypy"
 82 | ]
 83 | python_version = "3.10"
 84 | ignore_missing_imports = true
 85 | no_implicit_optional = true
 86 | check_untyped_defs = true
 87 | strict_equality = true
 88 | warn_redundant_casts = true
 89 | warn_unused_ignores = true
 90 | show_error_codes = true
 91 | disallow_any_generics = true
 92 | disallow_incomplete_defs = true
 93 | disallow_untyped_defs = true
 94 | 
 95 | [tool.pydantic-mypy]
 96 | init_forbid_extra = true
 97 | init_typed = true
 98 | warn_required_dynamic_aliases = true
 99 | 
100 | [tool.pytest.ini_options]
101 | filterwarnings = [
102 |     "error",
103 |     "ignore:There is no current event loop",
104 | ]
105 | testpaths = [
106 |     "tests",
107 | ]
108 | addopts = "--cov=src/ -v --cov-report=term-missing --durations=20"
109 | log_cli = false
110 | 


--------------------------------------------------------------------------------
/src/whyhow/__init__.py:
--------------------------------------------------------------------------------
1 | """WhyHow SDK."""
2 | 
3 | from whyhow.client import AsyncWhyHow, WhyHow
4 | 
5 | __version__ = "v0.0.7"
6 | __all__ = ["AsyncWhyHow", "WhyHow"]
7 | 


--------------------------------------------------------------------------------
/src/whyhow/apis/__init__.py:
--------------------------------------------------------------------------------
1 | """Actual implementation of sending requests."""
2 | 


--------------------------------------------------------------------------------
/src/whyhow/apis/base.py:
--------------------------------------------------------------------------------
 1 | """Base classes for API schemas."""
 2 | 
 3 | from abc import ABC
 4 | 
 5 | from httpx import AsyncClient, Client
 6 | from pydantic import BaseModel, ConfigDict
 7 | 
 8 | 
 9 | class APIBase(BaseModel, ABC):
10 |     """Base class for API schemas."""
11 | 
12 |     model_config = ConfigDict(arbitrary_types_allowed=True)
13 | 
14 |     client: Client
15 |     prefix: str = ""
16 | 
17 | 
18 | class AsyncAPIBase(BaseModel, ABC):
19 |     """Base class for async API schemas."""
20 | 
21 |     model_config = ConfigDict(arbitrary_types_allowed=True)
22 | 
23 |     client: AsyncClient
24 |     prefix: str = ""
25 | 


--------------------------------------------------------------------------------
/src/whyhow/apis/graph.py:
--------------------------------------------------------------------------------
  1 | """Interacting with the graph API."""
  2 | 
  3 | import csv
  4 | import json
  5 | import os
  6 | from pathlib import Path
  7 | 
  8 | from whyhow.apis.base import APIBase
  9 | from whyhow.schemas.common import Schema as SchemaModel
 10 | from whyhow.schemas.graph import (
 11 |     AddDocumentsResponse,
 12 |     CreateGraphResponse,
 13 |     CreateQuestionGraphRequest,
 14 |     CreateSchemaGraphRequest,
 15 |     QueryGraphRequest,
 16 |     QueryGraphResponse,
 17 |     SpecificQueryGraphRequest,
 18 |     SpecificQueryGraphResponse,
 19 | )
 20 | 
 21 | 
 22 | class GraphAPI(APIBase):
 23 |     """Interacting with the graph API synchronously."""
 24 | 
 25 |     def add_documents(self, namespace: str, documents: list[str]) -> str:
 26 |         """Add documents to the graph.
 27 | 
 28 |         Parameters
 29 |         ----------
 30 |         namespace : str
 31 |             The namespace of the graph.
 32 | 
 33 |         documents : list[str]
 34 |             The documents to add.
 35 |         """
 36 |         if not documents:
 37 |             raise ValueError("No documents provided")
 38 | 
 39 |         document_paths = [Path(document) for document in documents]
 40 |         if not all(document_path.exists() for document_path in document_paths):
 41 |             raise ValueError("Not all documents exist")
 42 | 
 43 |         if not all(
 44 |             document_path.suffix in [".pdf", ".csv"]
 45 |             for document_path in document_paths
 46 |         ):
 47 |             raise ValueError("Only PDFs and CSVs are supported")
 48 | 
 49 |         if (
 50 |             sum(
 51 |                 os.path.getsize(document_path)
 52 |                 for document_path in document_paths
 53 |             )
 54 |             > 8388600
 55 |         ):
 56 |             raise ValueError(
 57 |                 "PDFs too large, please limit your total upload size to <8MB."
 58 |             )
 59 | 
 60 |         if any(
 61 |             document_path.suffix == ".csv" for document_path in document_paths
 62 |         ):
 63 |             if len(document_paths) > 1:
 64 |                 raise ValueError(
 65 |                     "Too many documents"
 66 |                     "Please limit CSV uploads to 1 file during the beta."
 67 |                 )
 68 | 
 69 |         if len(document_paths) > 3:
 70 |             raise ValueError(
 71 |                 "Too many documents"
 72 |                 "Please limit PDF uploads to 3 files during the beta."
 73 |             )
 74 | 
 75 |         files = [
 76 |             (
 77 |                 "documents",
 78 |                 (document_path.name, open(document_path, "rb")),
 79 |             )
 80 |             for document_path in document_paths
 81 |         ]
 82 | 
 83 |         raw_response = self.client.post(
 84 |             f"{self.prefix}/{namespace}/add_documents",
 85 |             files=files,
 86 |         )
 87 | 
 88 |         raw_response.raise_for_status()
 89 | 
 90 |         response = AddDocumentsResponse.model_validate(raw_response.json())
 91 | 
 92 |         return response.message
 93 | 
 94 |     def generate_schema(self, documents: list[str]) -> str:
 95 |         """Generate a schema from CSV document."""
 96 |         if not documents:
 97 |             raise ValueError("No documents provided")
 98 | 
 99 |         document_paths = [Path(document) for document in documents]
100 |         if not all(document_path.exists() for document_path in document_paths):
101 |             raise ValueError("Not all documents exist")
102 | 
103 |         if not all(
104 |             document_path.suffix in [".csv"]
105 |             for document_path in document_paths
106 |         ):
107 |             raise ValueError(
108 |                 "Only CSVs are supported"
109 |                 "for local schema generation right now."
110 |             )
111 | 
112 |         if any(
113 |             document_path.suffix == ".csv" for document_path in document_paths
114 |         ):
115 |             if len(document_paths) > 1:
116 |                 raise ValueError(
117 |                     "Too many documents"
118 |                     "can only generate schema for one document at a time."
119 |                 )
120 |         entities = []
121 |         patterns = []
122 | 
123 |         with open(document_paths[0], newline="", encoding="utf-8-sig") as f:
124 |             reader = csv.reader(f)
125 |             for row in reader:
126 |                 for i in range(len(row) - 1):
127 |                     _pattern = {
128 |                         "head": row[0],
129 |                         "relation": (
130 |                             f"has_{row[i+1].lower().replace(' ', '_')}"
131 |                         ),
132 |                         "tail": row[i + 1],
133 |                         "description": "",
134 |                     }
135 |                     patterns.append(_pattern)
136 |                 for i in range(len(row)):
137 |                     _entity = {
138 |                         "name": row[i],
139 |                         "set_type_as": "",
140 |                         "property_columns": [],
141 |                         "description": "",
142 |                     }
143 |                     entities.append(_entity)
144 |                 break
145 | 
146 |         return json.dumps(
147 |             {"entities": entities, "patterns": patterns}, indent=4
148 |         )
149 | 
150 |     def create_graph(self, namespace: str, questions: list[str]) -> str:
151 |         """Create a new graph.
152 | 
153 |         Parameters
154 |         ----------
155 |         namespace : str
156 |             The namespace of the graph to create.
157 |         questions : list[str]
158 |             The seed concepts to initialize the graph with.
159 |         """
160 |         if not questions:
161 |             raise ValueError("No questions provided")
162 | 
163 |         request_body = CreateQuestionGraphRequest(questions=questions)
164 | 
165 |         raw_response = self.client.post(
166 |             f"{self.prefix}/{namespace}/create_graph",
167 |             json=request_body.model_dump(),
168 |         )
169 | 
170 |         raw_response.raise_for_status()
171 | 
172 |         response = CreateGraphResponse.model_validate(raw_response.json())
173 | 
174 |         return response.message
175 | 
176 |     def create_graph_from_schema(
177 |         self, namespace: str, schema_file: str
178 |     ) -> str:
179 |         """Create a new graph based on a user-defined schema.
180 | 
181 |         Parameters
182 |         ----------
183 |         namespace : str
184 |             The namespace of the graph to create.
185 |         schema_file : str
186 |             The schema file to use to build the graph.
187 |         """
188 |         if not schema_file:
189 |             raise ValueError("No schema provided")
190 | 
191 |         with open(schema_file, "r") as file:
192 |             schema_data = json.load(file)
193 | 
194 |         schema_model = SchemaModel(**schema_data)
195 | 
196 |         request_body = CreateSchemaGraphRequest(graph_schema=schema_model)
197 | 
198 |         raw_response = self.client.post(
199 |             f"{self.prefix}/{namespace}/create_graph_from_schema",
200 |             json=request_body.model_dump(),
201 |         )
202 | 
203 |         raw_response.raise_for_status()
204 | 
205 |         response = CreateGraphResponse.model_validate(raw_response.json())
206 | 
207 |         return response.message
208 | 
209 |     def create_graph_from_csv(self, namespace: str, schema_file: str) -> str:
210 |         """Create a new graph using a CSV based on a user-defined schema.
211 | 
212 |         Parameters
213 |         ----------
214 |         namespace : str
215 |             The namespace of the graph to create.
216 |         schema_file : str
217 |             The schema file to use to build the graph.
218 |         """
219 |         if not schema_file:
220 |             raise ValueError("No schema provided")
221 | 
222 |         with open(schema_file, "r", encoding="utf-8-sig") as file:
223 |             schema_data = json.load(file)
224 |             for entity in schema_data["entities"]:
225 |                 for property in entity["property_columns"]:
226 |                     if property.lower() in ["name", "namespace"]:
227 |                         raise ValueError(
228 |                             f"The values 'name' and 'namespace'"
229 |                             f"are not allowed in property_columns."
230 |                             f"Found '{property}'."
231 |                         )
232 | 
233 |         schema_model = SchemaModel(**schema_data)
234 | 
235 |         request_body = CreateSchemaGraphRequest(graph_schema=schema_model)
236 | 
237 |         raw_response = self.client.post(
238 |             f"{self.prefix}/{namespace}/create_graph_from_csv",
239 |             json=request_body.model_dump(),
240 |         )
241 | 
242 |         raw_response.raise_for_status()
243 | 
244 |         response = CreateGraphResponse.model_validate(raw_response.json())
245 | 
246 |         return response.message
247 | 
248 |     def query_graph(
249 |         self,
250 |         namespace: str,
251 |         query: str,
252 |         include_triples: bool = False,
253 |         include_chunks: bool = False,
254 |     ) -> QueryGraphResponse:
255 |         """Query the graph.
256 | 
257 |         Parameters
258 |         ----------
259 |         namespace : str
260 |             The namespace of the graph.
261 | 
262 |         query : str
263 |             The query to run.
264 | 
265 |         Returns
266 |         -------
267 |         QueryGraphResponse
268 |             The namespace, answer, triples, and chunks and Cypher query.
269 | 
270 |         """
271 |         request_body = QueryGraphRequest(
272 |             query=query,
273 |             include_triples=include_triples,
274 |             include_chunks=include_chunks,
275 |         )
276 | 
277 |         raw_response = self.client.post(
278 |             f"{self.prefix}/{namespace}/query",
279 |             json=request_body.model_dump(),
280 |         )
281 | 
282 |         raw_response.raise_for_status()
283 | 
284 |         response = QueryGraphResponse.model_validate(raw_response.json())
285 | 
286 |         # retval = QueryGraphReturn(answer=response.answer)
287 | 
288 |         return response
289 | 
290 |     def query_graph_specific(
291 |         self,
292 |         namespace: str,
293 |         query: str,
294 |         entities: list[str] = [],
295 |         relations: list[str] = [],
296 |         include_triples: bool = False,
297 |         include_chunks: bool = False,
298 |     ) -> SpecificQueryGraphResponse:
299 |         """Query the graph with specific entities and relations.
300 | 
301 |         Parameters
302 |         ----------
303 |         namespace : str
304 |             The namespace of the graph.
305 | 
306 |         entities : list[str]
307 |             The entities to query.
308 | 
309 |         relations : list[str]
310 |             The relations to query.
311 | 
312 |         Returns
313 |         -------
314 |         SpecificQueryGraphResponse
315 |             The namespace, answer, triples, and chunks.
316 | 
317 |         """
318 |         request_body = SpecificQueryGraphRequest(
319 |             query=query,
320 |             entities=entities,
321 |             relations=relations,
322 |             include_triples=include_triples,
323 |             include_chunks=include_chunks,
324 |         )
325 | 
326 |         raw_response = self.client.post(
327 |             f"{self.prefix}/{namespace}/specific_query",
328 |             json=request_body.model_dump(),
329 |         )
330 | 
331 |         raw_response.raise_for_status()
332 | 
333 |         response = SpecificQueryGraphResponse.model_validate(
334 |             raw_response.json()
335 |         )
336 | 
337 |         return response
338 | 


--------------------------------------------------------------------------------
/src/whyhow/client.py:
--------------------------------------------------------------------------------
  1 | """Implementation of the client logic."""
  2 | 
  3 | import os
  4 | from typing import Any, Generator, Optional
  5 | 
  6 | from httpx import AsyncClient, Auth, Client, Request, Response
  7 | 
  8 | from whyhow.apis.graph import GraphAPI
  9 | 
 10 | 
 11 | class APIKeyAuth(Auth):
 12 |     """Authorization header with API key."""
 13 | 
 14 |     def __init__(
 15 |         self,
 16 |         api_key: str,
 17 |         pinecone_api_key: str,
 18 |         neo4j_url: str,
 19 |         neo4j_user: str,
 20 |         neo4j_password: str,
 21 |         model_type: str,
 22 |         openai_api_key: Optional[str] = None,
 23 |         azure_openai_api_key: Optional[str] = None,
 24 |         azure_openai_version: Optional[str] = None,
 25 |         azure_openai_endpoint: Optional[str] = None,
 26 |         azure_openai_model_name: Optional[str] = None,
 27 |         azure_openai_embedding_name: Optional[str] = None,
 28 |         use_azure: Optional[bool] = False,
 29 |     ) -> None:
 30 |         """Initialize the auth object."""
 31 |         if openai_api_key and azure_openai_api_key:
 32 |             raise ValueError(
 33 |                 "Only one of openai_api_key or "
 34 |                 "azure_openai_api_key should be set."
 35 |             )
 36 | 
 37 |         self.api_key = api_key
 38 |         self.pinecone_api_key = pinecone_api_key
 39 |         self.neo4j_url = neo4j_url
 40 |         self.neo4j_user = neo4j_user
 41 |         self.neo4j_password = neo4j_password
 42 |         self.model_type = model_type
 43 |         self.openai_api_key = openai_api_key
 44 |         self.azure_openai_api_key = azure_openai_api_key
 45 |         self.azure_openai_version = azure_openai_version
 46 |         self.azure_openai_endpoint = azure_openai_endpoint
 47 |         self.azure_openai_model_name = azure_openai_model_name
 48 |         self.azure_openai_embedding_name = azure_openai_embedding_name
 49 |         self.use_azure = use_azure
 50 | 
 51 |     def auth_flow(
 52 |         self, request: Request
 53 |     ) -> Generator[Request, Response, None]:
 54 |         """Add the API key to the request."""
 55 |         request.headers["x-api-key"] = self.api_key
 56 |         request.headers["x-pinecone-key"] = self.pinecone_api_key
 57 |         if self.openai_api_key is not None:
 58 |             request.headers["x-openai-key"] = self.openai_api_key
 59 |         elif self.azure_openai_api_key is not None:
 60 |             request.headers["x-azure-openai-key"] = self.azure_openai_api_key
 61 |             if self.azure_openai_version is not None:
 62 |                 request.headers["x-azure-openai-version"] = (
 63 |                     self.azure_openai_version
 64 |                 )
 65 |             if self.azure_openai_endpoint is not None:
 66 |                 request.headers["x-azure-openai-endpoint"] = (
 67 |                     self.azure_openai_endpoint
 68 |                 )
 69 |             if self.azure_openai_model_name is not None:
 70 |                 request.headers["x-azure-openai-model-name"] = (
 71 |                     self.azure_openai_model_name
 72 |                 )
 73 |             if self.azure_openai_embedding_name is not None:
 74 |                 request.headers["x-azure-openai-embedding-name"] = (
 75 |                     self.azure_openai_embedding_name
 76 |                 )
 77 | 
 78 |         request.headers["x-neo4j-user"] = self.neo4j_user
 79 |         request.headers["x-neo4j-password"] = self.neo4j_password
 80 |         request.headers["x-neo4j-url"] = self.neo4j_url
 81 |         request.headers["x-model-type"] = self.model_type
 82 | 
 83 |         if self.use_azure:
 84 |             request.headers["x-use-azure"] = "True"
 85 |         elif not self.use_azure:
 86 |             request.headers["x-use-azure"] = "False"
 87 | 
 88 |         yield request
 89 | 
 90 | 
 91 | class WhyHow:
 92 |     """Synchronous client for the WhyHow API.
 93 | 
 94 |     Parameters
 95 |     ----------
 96 |     api_key : str, optional
 97 |         The API key to use for authentication. If not provided, the
 98 |         WHYHOW_API_KEY environment variable will be used.
 99 | 
100 |     base_url : str, optional
101 |         The base URL for the API.
102 | 
103 |     httpx_kwargs : dict, optional
104 |         Additional keyword arguments to pass to the httpx client.
105 | 
106 |     Attributes
107 |     ----------
108 |     httpx_client : httpx.Client
109 |         A synchronous httpx client.
110 |     """
111 | 
112 |     def __init__(
113 |         self,
114 |         api_key: str | None = None,
115 |         pinecone_api_key: str | None = None,
116 |         neo4j_url: str | None = None,
117 |         neo4j_user: str | None = None,
118 |         neo4j_password: str | None = None,
119 |         model_type: Optional[str] | None = None,
120 |         openai_api_key: str | None = None,
121 |         azure_openai_api_key: str | None = None,
122 |         azure_openai_version: str | None = None,
123 |         azure_openai_endpoint: str | None = None,
124 |         azure_openai_model_name: str | None = None,
125 |         azure_openai_embedding_name: str | None = None,
126 |         base_url:
127 |             str = "https://43nq5c1b4c.execute-api.us-east-2.amazonaws.com",
128 |         use_azure: Optional[bool] = False,
129 |         httpx_kwargs: dict[str, Any] | None = None,
130 |     ) -> None:
131 |         """Initialize the client."""
132 |         if httpx_kwargs is None:
133 |             httpx_kwargs = {}
134 | 
135 |         if api_key is None:
136 |             api_key = os.environ.get("WHYHOW_API_KEY")
137 | 
138 |             if api_key is None:
139 |                 raise ValueError("WHYHOW_API_KEY must be set.")
140 | 
141 |         if pinecone_api_key is None:
142 |             pinecone_api_key = os.environ.get("PINECONE_API_KEY")
143 | 
144 |             if pinecone_api_key is None:
145 |                 raise ValueError("PINECONE_API_KEY must be set.")
146 | 
147 |         if model_type is None:
148 |             model_type = os.environ.get("MODEL_TYPE")
149 | 
150 |             if model_type is None:
151 |                 model_type = "general"
152 | 
153 |             elif model_type not in ["general", "health"]:
154 |                 print("Invalid model type. Using general model.")
155 |                 model_type = "general"
156 | 
157 |         if openai_api_key is None:
158 |             openai_api_key = os.environ.get("OPENAI_API_KEY")
159 | 
160 |         if azure_openai_api_key is None:
161 |             azure_openai_api_key = os.environ.get("AZURE_OPENAI_API_KEY")
162 | 
163 |         if openai_api_key is None and azure_openai_api_key is None:
164 |             raise ValueError(
165 |                 "At least one of OPENAI_API_KEY"
166 |                 "or AZURE_OPENAI_API_KEY must be set."
167 |             )
168 | 
169 |         if azure_openai_version is None:
170 |             azure_openai_version = os.environ.get("AZURE_OPENAI_API_VERSION")
171 | 
172 |             if (
173 |                 azure_openai_api_key is not None
174 |                 and azure_openai_version is None
175 |             ):
176 |                 raise ValueError("AZURE_OPENAI_API_VERSION must be set.")
177 | 
178 |         if azure_openai_endpoint is None:
179 |             azure_openai_endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT")
180 | 
181 |             if (
182 |                 azure_openai_api_key is not None
183 |                 and azure_openai_endpoint is None
184 |             ):
185 |                 raise ValueError("AZURE_OPENAI_ENDPOINT must be set.")
186 | 
187 |         if azure_openai_model_name is None:
188 |             azure_openai_model_name = os.environ.get("AZURE_OPENAI_MODEL_NAME")
189 | 
190 |             if (
191 |                 azure_openai_api_key is not None
192 |                 and azure_openai_model_name is None
193 |             ):
194 |                 raise ValueError("AZURE_OPENAI_MODEL_NAME must be set.")
195 | 
196 |         if azure_openai_embedding_name is None:
197 |             azure_openai_embedding_name = os.environ.get(
198 |                 "AZURE_OPENAI_EMBEDDING_NAME"
199 |             )
200 | 
201 |             if (
202 |                 azure_openai_api_key is not None
203 |                 and azure_openai_embedding_name is None
204 |             ):
205 |                 raise ValueError("AZURE_OPENAI_EMBEDDING_NAME must be set.")
206 | 
207 |         if neo4j_user is None:
208 |             neo4j_user = os.environ.get("NEO4J_USER")
209 | 
210 |             if neo4j_user is None:
211 |                 raise ValueError("NEO4J_USER must be set.")
212 | 
213 |         if neo4j_password is None:
214 |             neo4j_password = os.environ.get("NEO4J_PASSWORD")
215 | 
216 |             if neo4j_password is None:
217 |                 raise ValueError("NEO4J_PASSWORD must be set.")
218 | 
219 |         if neo4j_url is None:
220 |             neo4j_url = os.environ.get("NEO4J_URL")
221 | 
222 |             if neo4j_url is None:
223 |                 raise ValueError("NEO4J_URL must be set.")
224 | 
225 |         auth = APIKeyAuth(
226 |             api_key,
227 |             pinecone_api_key,
228 |             neo4j_url,
229 |             neo4j_user,
230 |             neo4j_password,
231 |             model_type,
232 |             openai_api_key,
233 |             azure_openai_api_key,
234 |             azure_openai_version,
235 |             azure_openai_endpoint,
236 |             azure_openai_model_name,
237 |             azure_openai_embedding_name,
238 |             use_azure,
239 |         )
240 | 
241 |         if "base_url" in httpx_kwargs:
242 |             raise ValueError("base_url cannot be set in httpx_kwargs.")
243 | 
244 |         httpx_kwargs["timeout"] = 60.0  # Set timeout to 30 seconds
245 | 
246 |         self.httpx_client = Client(
247 |             base_url=base_url,
248 |             auth=auth,
249 |             **httpx_kwargs,
250 |         )
251 | 
252 |         self.graph = GraphAPI(client=self.httpx_client, prefix="/graphs")
253 | 
254 | 
255 | class AsyncWhyHow:
256 |     """Asynchronous client for the WhyHow API.
257 | 
258 |     Parameters
259 |     ----------
260 |     api_key : str, optional
261 |         The API key to use for authentication. If not provided, the
262 |         WHYHOW_API_KEY environment variable will be used.
263 | 
264 |     base_url : str, optional
265 |         The base URL for the API.
266 | 
267 |     httpx_kwargs : dict, optional
268 |         Additional keyword arguments to pass to the httpx async client.
269 | 
270 |     Attributes
271 |     ----------
272 |     httpx_client : httpx.AsyncClient
273 |         An async httpx client.
274 |     """
275 | 
276 |     def __init__(
277 |         self,
278 |         api_key: str | None = None,
279 |         pinecone_api_key: str | None = None,
280 |         openai_api_key: str | None = None,
281 |         neo4j_user: str | None = None,
282 |         neo4j_password: str | None = None,
283 |         neo4j_url: str | None = None,
284 |         model_type: Optional[str] | None = None,
285 |         base_url:
286 |             str = "https://43nq5c1b4c.execute-api.us-east-2.amazonaws.com",
287 |         httpx_kwargs: dict[str, Any] | None = None,
288 |     ) -> None:
289 |         """Initialize the client."""
290 |         if httpx_kwargs is None:
291 |             httpx_kwargs = {}
292 | 
293 |         if api_key is None:
294 |             api_key = os.environ.get("WHYHOW_API_KEY")
295 | 
296 |             if api_key is None:
297 |                 raise ValueError("WHYHOW_API_KEY must be set.")
298 | 
299 |         if pinecone_api_key is None:
300 |             pinecone_api_key = os.environ.get("PINECONE_API_KEY")
301 | 
302 |             if pinecone_api_key is None:
303 |                 raise ValueError("PINECONE_API_KEY must be set.")
304 | 
305 |         if model_type is None:
306 |             model_type = os.environ.get("MODEL_TYPE")
307 | 
308 |             if model_type is None:
309 |                 model_type = "general"
310 | 
311 |             elif model_type not in ["general", "health"]:
312 |                 print("Invalid model type. Using general model.")
313 |                 model_type = "general"
314 | 
315 |         if openai_api_key is None:
316 |             openai_api_key = os.environ.get("OPENAI_API_KEY")
317 | 
318 |             if openai_api_key is None:
319 |                 raise ValueError("OPENAI_API_KEY must be set.")
320 | 
321 |         if neo4j_user is None:
322 |             neo4j_user = os.environ.get("NEO4J_USER")
323 | 
324 |             if neo4j_user is None:
325 |                 raise ValueError("NEO4J_USER must be set.")
326 | 
327 |         if neo4j_password is None:
328 |             neo4j_password = os.environ.get("NEO4J_PASSWORD")
329 | 
330 |             if neo4j_password is None:
331 |                 raise ValueError("NEO4J_PASSWORD must be set.")
332 | 
333 |         if neo4j_url is None:
334 |             neo4j_url = os.environ.get("NEO4J_URL")
335 | 
336 |             if neo4j_url is None:
337 |                 raise ValueError("NEO4J_URL must be set.")
338 | 
339 |         auth = APIKeyAuth(
340 |             api_key,
341 |             pinecone_api_key,
342 |             openai_api_key,
343 |             neo4j_user,
344 |             neo4j_password,
345 |             neo4j_url,
346 |             model_type,
347 |         )
348 | 
349 |         if "base_url" in httpx_kwargs:
350 |             raise ValueError("base_url cannot be set in httpx_kwargs.")
351 | 
352 |         self.httpx_client = AsyncClient(
353 |             base_url=base_url,
354 |             auth=auth,
355 |             **httpx_kwargs,
356 |         )
357 | 


--------------------------------------------------------------------------------
/src/whyhow/exceptions.py:
--------------------------------------------------------------------------------
1 | """Collection of all custom exceptions for the package."""
2 | 
3 | 
4 | class ResourceNotAvailableError(Exception):
5 |     """Raised when a resource is not available."""
6 | 
7 |     pass
8 | 


--------------------------------------------------------------------------------
/src/whyhow/py.typed:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whyhow-ai/whyhow/63a3c6c18e660ac5c7268159956f62db3f15a0ae/src/whyhow/py.typed


--------------------------------------------------------------------------------
/src/whyhow/schemas/__init__.py:
--------------------------------------------------------------------------------
1 | """Collection of various schemas and types."""
2 | 
3 | from whyhow.schemas.common import Entity, Graph, Node, Relationship, Triple
4 | 
5 | __all__ = ["Entity", "Graph", "Node", "Relationship", "Triple"]
6 | 


--------------------------------------------------------------------------------
/src/whyhow/schemas/base.py:
--------------------------------------------------------------------------------
 1 | """Base classes for request, response, and return schemas."""
 2 | 
 3 | from abc import ABC
 4 | 
 5 | from pydantic import BaseModel, ConfigDict
 6 | 
 7 | 
 8 | class BaseRequest(BaseModel, ABC):
 9 |     """Base class for all request schemas."""
10 | 
11 |     model_config = ConfigDict(extra="forbid")
12 | 
13 | 
14 | class BaseResponse(BaseModel, ABC):
15 |     """Base class for all response schemas.
16 | 
17 |     Since the API can change, we want to ignore any extra fields that are not
18 |     defined in the schema.
19 |     """
20 | 
21 |     model_config = ConfigDict(extra="ignore")
22 | 
23 | 
24 | class BaseReturn(BaseModel, ABC):
25 |     """Base class for return schemas."""
26 | 
27 |     model_config = ConfigDict(extra="forbid")
28 | 


--------------------------------------------------------------------------------
/src/whyhow/schemas/common.py:
--------------------------------------------------------------------------------
  1 | """Shared schemas."""
  2 | 
  3 | from typing import Any, List, Optional
  4 | 
  5 | from pydantic import BaseModel, Field, model_validator
  6 | 
  7 | 
  8 | class Node(BaseModel):
  9 |     """Schema for a single node.
 10 | 
 11 |     Mirroring Neo4j"s node structure.
 12 |     """
 13 | 
 14 |     labels: list[str]
 15 |     properties: dict[str, Any] = Field(default_factory=dict)
 16 | 
 17 | 
 18 | class Relationship(BaseModel):
 19 |     """Schema for a single relationship.
 20 | 
 21 |     Mirroring Neo4j"s relationship structure.
 22 |     """
 23 | 
 24 |     type: str
 25 |     start_node: Node
 26 |     end_node: Node
 27 |     properties: dict[str, Any] = Field(default_factory=dict)
 28 | 
 29 | 
 30 | class Graph(BaseModel):
 31 |     """Schema for a graph.
 32 | 
 33 |     Mirroring Neo4j"s graph structure.
 34 |     """
 35 | 
 36 |     relationships: list[Relationship]
 37 |     nodes: list[Node]
 38 | 
 39 |     @model_validator(mode="before")
 40 |     @classmethod
 41 |     def imply_nodes(cls, data: dict[str, Any]) -> dict[str, Any]:
 42 |         """Implies nodes from relationships if not provided."""
 43 |         if "nodes" not in data or data["nodes"] is None:
 44 |             nodes = []
 45 |             for rel in data.get("relationships", []):
 46 |                 start = rel.start_node
 47 |                 end = rel.end_node
 48 |                 if start not in nodes:
 49 |                     nodes.append(start)
 50 |                 if end not in nodes:
 51 |                     nodes.append(end)
 52 | 
 53 |             data["nodes"] = nodes
 54 | 
 55 |         return data
 56 | 
 57 | 
 58 | class Entity(BaseModel):
 59 |     """Schema for a single entity.
 60 | 
 61 |     Note that this is not identical to Node because
 62 |     it only allows for 1 label and the text is a required field.
 63 |     """
 64 | 
 65 |     text: str
 66 |     label: str
 67 |     properties: dict[str, Any] = Field(default_factory=dict)
 68 | 
 69 |     def to_node(self) -> Node:
 70 |         """Convert the entity to a node."""
 71 |         properties = self.properties.copy()
 72 |         properties["name"] = self.text
 73 | 
 74 |         return Node(labels=[self.label], properties=properties)
 75 | 
 76 |     @classmethod
 77 |     def from_node(cls, node: Node) -> "Entity":
 78 |         """Convert a node to an entity."""
 79 |         if "name" not in node.properties:
 80 |             raise ValueError("Node must have a name property.")
 81 | 
 82 |         properties = node.properties.copy()
 83 |         name = properties.pop("name")
 84 | 
 85 |         return cls(
 86 |             text=name,
 87 |             label=node.labels[0],  # take the first label
 88 |             properties=properties,
 89 |         )
 90 | 
 91 | 
 92 | class Triple(BaseModel):
 93 |     """Schema for a single triple.
 94 | 
 95 |     Note that this is not identical to RelationshipNeo4j because
 96 |     it is not using ids
 97 | 
 98 |     """
 99 | 
100 |     head: str
101 |     head_type: str
102 |     relationship: str
103 |     tail: str
104 |     tail_type: str
105 |     properties: dict[str, Any] = Field(default_factory=dict)
106 | 
107 |     def to_relationship(self) -> Relationship:
108 |         """Convert the triple to a relationship."""
109 |         start = Node(labels=[self.head_type], properties={"name": self.head})
110 |         end = Node(labels=[self.tail_type], properties={"name": self.tail})
111 | 
112 |         return Relationship(
113 |             type=self.relationship,
114 |             start_node=start,
115 |             end_node=end,
116 |             properties=self.properties,
117 |         )
118 | 
119 |     @classmethod
120 |     def from_relationship(cls, relationship: Relationship) -> "Triple":
121 |         """Convert a relationship to a triple."""
122 |         start = relationship.start_node
123 |         end = relationship.end_node
124 | 
125 |         if "name" not in start.properties:
126 |             raise ValueError("Start node must have a name property.")
127 |         if "name" not in end.properties:
128 |             raise ValueError("End node must have a name property.")
129 | 
130 |         return cls(
131 |             head=start.properties["name"],
132 |             head_type=start.labels[0],  # take the first label
133 |             relationship=relationship.type,
134 |             tail=end.properties["name"],
135 |             tail_type=end.labels[0],  # take the first label
136 |             properties=relationship.properties,
137 |         )
138 | 
139 | 
140 | # GRAPH SCHEMA
141 | class SchemaEntity(BaseModel):
142 |     """Schema Entity model."""
143 | 
144 |     name: str
145 |     property_columns: Optional[List[str]] = None
146 |     set_type_as: Optional[str] = None
147 |     description: str
148 | 
149 | 
150 | class SchemaRelation(BaseModel):
151 |     """Schema Relation model."""
152 | 
153 |     name: str
154 |     description: str
155 | 
156 | 
157 | class TriplePattern(BaseModel):
158 |     """Schema Triple Pattern model."""
159 | 
160 |     head: str
161 |     relation: str
162 |     tail: str
163 |     description: str
164 | 
165 | 
166 | class Schema(BaseModel):
167 |     """Schema model."""
168 | 
169 |     entities: List[SchemaEntity] = Field(default_factory=list)
170 |     relations: List[SchemaRelation] = Field(default_factory=list)
171 |     patterns: List[TriplePattern] = Field(default_factory=list)
172 | 
173 |     def get_entity(self, name: str) -> Optional[SchemaEntity]:
174 |         """Return an entity by name if it exists in the schema."""
175 |         for entity in self.entities:
176 |             if entity.name == name:
177 |                 return entity
178 |         return None  # Return None if no entity with that name is found
179 | 
180 |     def get_relation(self, name: str) -> Optional[SchemaRelation]:
181 |         """Return a relation by name if it exists in the schema."""
182 |         for relation in self.relations:
183 |             if relation.name == name:
184 |                 return relation
185 |         return None  # Return None if no relation with that name is found
186 | 


--------------------------------------------------------------------------------
/src/whyhow/schemas/graph.py:
--------------------------------------------------------------------------------
  1 | """Collection of schemas for the API."""
  2 | 
  3 | from typing import Literal
  4 | 
  5 | from whyhow.schemas.base import BaseRequest, BaseResponse, BaseReturn
  6 | from whyhow.schemas.common import Graph, Schema
  7 | 
  8 | # Custom types
  9 | Status = Literal["success", "pending", "failure"]
 10 | 
 11 | 
 12 | class AddDocumentsResponse(BaseResponse):
 13 |     """Schema for the response body of the add documents endpoint."""
 14 | 
 15 |     namespace: str
 16 |     message: str
 17 | 
 18 | 
 19 | class CreateQuestionGraphRequest(BaseRequest):
 20 |     """Schema for the request body of the create graph endpoint."""
 21 | 
 22 |     questions: list[str]
 23 | 
 24 | 
 25 | class CreateSchemaGraphRequest(BaseRequest):
 26 |     """Schema for the request body of the create graph with schema endpoint."""
 27 | 
 28 |     graph_schema: Schema
 29 | 
 30 | 
 31 | # Request and response schemas
 32 | class CreateGraphResponse(BaseResponse):
 33 |     """Schema for the response body of the create graph endpoint."""
 34 | 
 35 |     namespace: str
 36 |     message: str
 37 | 
 38 | 
 39 | class GetGraphResponse(BaseResponse):
 40 |     """Schema for the response body of the get graph endpoint."""
 41 | 
 42 |     namespace: str
 43 |     status: Status
 44 |     documents: list[str]
 45 |     graph: Graph
 46 | 
 47 | 
 48 | class QueryGraphRequest(BaseRequest):
 49 |     """Schema for the request body of the query graph endpoint."""
 50 | 
 51 |     query: str
 52 |     include_triples: bool = False
 53 |     include_chunks: bool = False
 54 | 
 55 | 
 56 | class QueryGraphTripleResponse(BaseResponse):
 57 |     """Schema for the triples within the query graph response."""
 58 | 
 59 |     head: str
 60 |     relation: str
 61 |     tail: str
 62 | 
 63 | 
 64 | class QueryGraphChunkResponse(BaseResponse):
 65 |     """Schema for the triples within the query graph response."""
 66 | 
 67 |     head: str
 68 |     relation: str
 69 |     tail: str
 70 |     chunk_ids: list[str]
 71 |     chunk_texts: list[str]
 72 | 
 73 | 
 74 | class QueryGraphResponse(BaseResponse):
 75 |     """Schema for the response body of the query graph endpoint."""
 76 | 
 77 |     namespace: str
 78 |     answer: str
 79 |     triples: list[QueryGraphTripleResponse] = []
 80 |     chunks: list[QueryGraphChunkResponse] = []
 81 | 
 82 | 
 83 | class QueryGraphReturn(BaseReturn):
 84 |     """Schema for the return value of the query graph endpoint."""
 85 | 
 86 |     namespace: str
 87 |     answer: str
 88 |     triples: list[QueryGraphTripleResponse] = []
 89 |     chunks: list[QueryGraphChunkResponse] = []
 90 | 
 91 | 
 92 | class SpecificQueryGraphRequest(BaseRequest):
 93 |     """Schema for the request body of the specific query graph endpoint."""
 94 | 
 95 |     query: str
 96 |     entities: list[str] = []
 97 |     relations: list[str] = []
 98 |     include_triples: bool = False
 99 |     include_chunks: bool = False
100 | 
101 | 
102 | class SpecificQueryGraphResponse(BaseResponse):
103 |     """Schema for the response body of the query graph endpoint."""
104 | 
105 |     namespace: str
106 |     answer: str
107 |     triples: list[dict[str, str]] = []
108 | 


--------------------------------------------------------------------------------
/tests/apis/test_graph.py:
--------------------------------------------------------------------------------
  1 | """Tests focused on the graph API."""
  2 | 
  3 | import os
  4 | 
  5 | import pytest
  6 | 
  7 | from whyhow.client import WhyHow
  8 | from whyhow.schemas.common import Graph, Node, Relationship
  9 | from whyhow.schemas.graph import QueryGraphRequest, QueryGraphResponse
 10 | 
 11 | # Set fake environment variables
 12 | os.environ["WHYHOW_API_KEY"] = "fake_api_key"
 13 | os.environ["OPENAI_API_KEY"] = "fake_openai_key"
 14 | os.environ["PINECONE_API_KEY"] = "fake_pinecone_key"
 15 | os.environ["NEO4J_USER"] = "fake_neo4j_user"
 16 | os.environ["NEO4J_PASSWORD"] = "fake_neo4j_password"
 17 | os.environ["NEO4J_URL"] = "fake_neo4j_url"
 18 | 
 19 | EXAMPLE_GRAPH = Graph(
 20 |     relationships=[
 21 |         Relationship(
 22 |             type="knows",
 23 |             start_node=Node(labels=["Person"], properties={"name": "Alice"}),
 24 |             end_node=Node(labels=["Person"], properties={"name": "Bob"}),
 25 |             properties={"since": "2022-01-01"},
 26 |         )
 27 |     ]
 28 | )
 29 | 
 30 | 
 31 | class TestGraphAPIQuery:
 32 |     """Tests for the `query` method."""
 33 | 
 34 |     def test_query_graph(self, httpx_mock):
 35 |         """Test that the method sends the correct request."""
 36 |         client = WhyHow(
 37 |             openai_api_key="fake_openai_key",
 38 |             azure_openai_api_key=None,
 39 |             azure_openai_version=None,
 40 |         )
 41 |         query = "What friends does Alice have?"
 42 | 
 43 |         fake_response_body = QueryGraphResponse(
 44 |             namespace="something",
 45 |             answer="Alice knows Bob",
 46 |             triples=[],
 47 |             chunks=[],
 48 |         )
 49 |         httpx_mock.add_response(
 50 |             method="POST",
 51 |             json=fake_response_body.model_dump(),
 52 |         )
 53 | 
 54 |         result = client.graph.query_graph(
 55 |             namespace="something",
 56 |             query=query,
 57 |         )
 58 | 
 59 |         assert result == QueryGraphResponse(
 60 |             namespace="something",
 61 |             answer="Alice knows Bob",
 62 |             triples=[],
 63 |             chunks=[],
 64 |         )
 65 | 
 66 |         actual_request = httpx_mock.get_requests()[0]
 67 |         expected_request_body = QueryGraphRequest(query=query)
 68 |         actual_request_body = QueryGraphRequest.model_validate_json(
 69 |             actual_request.read().decode()
 70 |         )
 71 | 
 72 |         assert actual_request.url.path == "/graphs/something/query"
 73 |         assert actual_request_body == expected_request_body
 74 | 
 75 | 
 76 | class TestGraphAPIAddDocuments:
 77 |     """Tests for the `add_documents` method."""
 78 | 
 79 |     def test_errors(self, httpx_mock, tmp_path):
 80 |         """Test that the method raises errors when necessary."""
 81 |         client = WhyHow(
 82 |             openai_api_key="fake_openai_key",
 83 |             azure_openai_api_key=None,
 84 |             azure_openai_version=None,
 85 |         )
 86 | 
 87 |         with pytest.raises(ValueError, match="No documents provided"):
 88 |             client.graph.add_documents("something", documents=[])
 89 | 
 90 |         tmp_pdf_1 = tmp_path / "example.pdf"
 91 |         tmp_pdf_1.touch()
 92 |         tmp_pdf_2 = tmp_path / "example2.wav"
 93 | 
 94 |         with pytest.raises(ValueError, match="Not all documents exist"):
 95 |             client.graph.add_documents(
 96 |                 "something",
 97 |                 documents=[tmp_pdf_1, tmp_pdf_2],
 98 |             )
 99 | 
100 |         tmp_pdf_2.touch()
101 | 
102 |         with pytest.raises(
103 |             ValueError, match="Only PDFs and CSVs are supported"
104 |         ):
105 |             client.graph.add_documents(
106 |                 "something",
107 |                 documents=[tmp_pdf_1, tmp_pdf_2],
108 |             )
109 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | """Configuration for the tests."""
 2 | 
 3 | import pathlib
 4 | 
 5 | import pytest
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def test_path():
10 |     """Return the path to the tests directory."""
11 |     return pathlib.Path(__file__).parent
12 | 
13 | 
14 | @pytest.fixture(autouse=True)
15 | def delete_env_vars(monkeypatch):
16 |     """Delete environment variables.
17 | 
18 |     This fixture is used to delete the environment variables that are used
19 | 
20 |     """
21 |     monkeypatch.setenv("WHYHOW_API_KEY", "FAKE")
22 | 


--------------------------------------------------------------------------------
/tests/schemas/test_common.py:
--------------------------------------------------------------------------------
  1 | """Tests for whyhow.schemas.common."""
  2 | 
  3 | import pytest
  4 | 
  5 | from whyhow.schemas.common import Entity, Graph, Node, Relationship, Triple
  6 | 
  7 | 
  8 | class TestGraph:
  9 |     """Tests for the Graph class."""
 10 | 
 11 |     def test_no_nodes(self):
 12 |         """Test creating a graph with no nodes."""
 13 |         graph = Graph(relationships=[])
 14 |         assert graph.nodes == []
 15 |         assert graph.relationships == []
 16 | 
 17 |     def test_1_node(self):
 18 |         """Test creating a graph with one node."""
 19 |         node = Node(labels=["Person"], properties={"name": "Alice"})
 20 |         graph = Graph(nodes=[node], relationships=[])
 21 |         assert graph.nodes == [node]
 22 |         assert graph.relationships == []
 23 | 
 24 |     def test_3_nodes_1_rel(self):
 25 |         """Test creating a graph with three nodes and one relationship."""
 26 |         node_1 = Node(labels=["Person"], properties={"name": "Alice"})
 27 |         node_2 = Node(labels=["Person"], properties={"name": "Bob"})
 28 |         node_3 = Node(labels=["Person"], properties={"name": "Charlie"})
 29 | 
 30 |         rel = Relationship(
 31 |             start_node=node_1,
 32 |             end_node=node_2,
 33 |             type="KNOWS",
 34 |             properties={"since": 1999},
 35 |         )
 36 | 
 37 |         graph = Graph(nodes=[node_1, node_2, node_3], relationships=[rel])
 38 |         assert graph.nodes == [node_1, node_2, node_3]
 39 |         assert graph.relationships == [rel]
 40 | 
 41 |         graph_implied = Graph(relationships=[rel])
 42 | 
 43 |         assert graph_implied.nodes == [node_1, node_2]
 44 |         assert graph_implied.relationships == [rel]
 45 | 
 46 | 
 47 | class TestEntity:
 48 |     """Tests for the Entity class."""
 49 | 
 50 |     def test_overall(self):
 51 |         """Test creating an entity and converting it to a node."""
 52 |         entity = Entity(
 53 |             text="Alice", label="Person", properties={"foo": "bar"}
 54 |         )
 55 |         node = entity.to_node()
 56 | 
 57 |         assert node.labels == ["Person"]
 58 |         assert node.properties == {"name": "Alice", "foo": "bar"}
 59 | 
 60 |         entity_reconstructed = Entity.from_node(node)
 61 | 
 62 |         assert entity.text == entity_reconstructed.text
 63 |         assert entity.label == entity_reconstructed.label
 64 |         assert entity.properties == entity_reconstructed.properties
 65 | 
 66 |         # test properties copied
 67 |         assert entity.properties is not entity_reconstructed.properties
 68 | 
 69 |     def test_missing_name(self):
 70 |         """Test creating an entity without a name property."""
 71 |         node = Node(labels=["Person"], properties={})
 72 | 
 73 |         with pytest.raises(ValueError, match="Node must have a name property"):
 74 |             Entity.from_node(node)
 75 | 
 76 | 
 77 | class TestTriple:
 78 |     """Tests for the Triple class."""
 79 | 
 80 |     # def test_overall(self):
 81 |     #     triple = Triple(
 82 |     #         head="Alice",
 83 |     #         head_type="Person",
 84 |     #         relationship="KNOWS",
 85 |     #         tail="Bob",
 86 |     #         tail_type="Person",
 87 |     #         properties={"since": 1999},
 88 |     #     )
 89 | 
 90 |     #     assert triple.head == "Alice"
 91 |     #     assert triple.head_type == "Person"
 92 |     #     assert triple.relationship == "KNOWS"
 93 |     #     assert triple.tail == "Bob"
 94 |     #     assert triple.tail_type == "Person"
 95 | 
 96 |     #     rel = triple.to_relationship()
 97 | 
 98 |     #     assert rel.start_node.labels == ["Person"]
 99 |     #     assert rel.start_node.properties == {"name": "Alice"}
100 |     #     assert rel.end_node.labels == ["Person"]
101 |     #     assert rel.end_node.properties == {"name": "Bob"}
102 |     #     assert rel.type == "KNOWS"
103 |     #     assert rel.properties == {"since": 1999}
104 | 
105 |     #     triple_reconstructed = Triple.from_relationship(rel)
106 | 
107 |     #     assert triple.head == triple_reconstructed.head
108 |     #     assert triple.head_type == triple_reconstructed.head_type
109 |     #     assert triple.relationship == triple_reconstructed.relationship
110 |     #     assert triple.tail == triple_reconstructed.tail
111 |     #     assert triple.tail_type == triple_reconstructed.tail_type
112 |     #     assert triple.properties == triple_reconstructed.properties
113 | 
114 |     #     # test properties copied
115 |     #     assert triple.properties is not triple_reconstructed.properties
116 | 
117 |     def test_missing_name(self):
118 |         """Test creating a triple with a node missing a name property."""
119 |         rel = Relationship(
120 |             start_node=Node(labels=["Person"], properties={}),
121 |             end_node=Node(labels=["Person"], properties={"name": "Bob"}),
122 |             type="KNOWS",
123 |             properties={},
124 |         )
125 | 
126 |         with pytest.raises(
127 |             ValueError, match="Start node must have a name property"
128 |         ):
129 |             Triple.from_relationship(rel)
130 | 
131 |         rel = Relationship(
132 |             start_node=Node(labels=["Person"], properties={"name": "Alice"}),
133 |             end_node=Node(labels=["Person"], properties={}),
134 |             type="KNOWS",
135 |             properties={},
136 |         )
137 | 
138 |         with pytest.raises(
139 |             ValueError, match="End node must have a name property"
140 |         ):
141 |             Triple.from_relationship(rel)
142 | 


--------------------------------------------------------------------------------
/tests/test_client.py:
--------------------------------------------------------------------------------
 1 | """Tests for the client module."""
 2 | 
 3 | from unittest.mock import Mock
 4 | 
 5 | import pytest
 6 | from httpx import Client
 7 | 
 8 | from whyhow.client import WhyHow
 9 | 
10 | 
11 | class TestWhyHow:
12 |     """Tests for the WhyHow class."""
13 | 
14 |     def test_constructor_missing_api_key(self, monkeypatch):
15 |         """Test that an error raised when the API key is missing."""
16 |         monkeypatch.delenv("WHYHOW_API_KEY", raising=False)
17 |         with pytest.raises(ValueError, match="WHYHOW_API_KEY must be set"):
18 |             WhyHow()
19 | 
20 |     def test_httpx_kwargs(self, monkeypatch):
21 |         """Test that httpx_kwargs passed to the httpx client."""
22 |         fake_httpx_client_inst = Mock(spec=Client)
23 |         fake_httpx_client_class = Mock(return_value=fake_httpx_client_inst)
24 | 
25 |         monkeypatch.setattr("whyhow.client.Client", fake_httpx_client_class)
26 |         httpx_kwargs = {"verify": False}
27 |         client = WhyHow(
28 |             api_key="key",
29 |             httpx_kwargs=httpx_kwargs,
30 |         )
31 | 
32 |         assert fake_httpx_client_class.call_count == 1
33 |         args, kwargs = fake_httpx_client_class.call_args
34 | 
35 |         assert not args
36 |         assert (
37 |             kwargs["base_url"]
38 |             == "https://43nq5c1b4c.execute-api.us-east-2.amazonaws.com"
39 |         )
40 |         assert not kwargs["verify"]
41 | 
42 |         assert client.httpx_client is fake_httpx_client_class.return_value
43 | 
44 |     def test_base_url_twice(self):
45 |         """Test that an error raised when base_url is set twice."""
46 |         with pytest.raises(
47 |             ValueError, match="base_url cannot be set in httpx_kwargs."
48 |         ):
49 |             WhyHow(
50 |                 api_key="key",
51 |                 httpx_kwargs={"base_url": "https://example.com"},
52 |             )
53 | 


--------------------------------------------------------------------------------
/tests/test_dummy.py:
--------------------------------------------------------------------------------
1 | """Dummy test."""
2 | 
3 | 
4 | def test():
5 |     """Dummy test."""
6 |     assert True
7 | 


--------------------------------------------------------------------------------