├── .DS_Store ├── .gitignore ├── LICENSE ├── README.md ├── setup.py ├── tests └── test_tychos.py ├── tychos.egg-info ├── dependency_links.txt ├── entry_points.txt ├── requires.txt └── top_level.txt └── tychos ├── __init__.py ├── __pycache__ ├── __init__.cpython-311.pyc ├── __init__.cpython-39.pyc ├── cli.cpython-39.pyc ├── config.cpython-39.pyc ├── vector.cpython-311.pyc └── vector_data_store.cpython-311.pyc ├── cli.py ├── config.py ├── helpers ├── __init__.py └── validation_checks.py ├── vector.py └── vector_data_store.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tychos-ai/tychos-python/78c9fc2c45ee93f8fbfb6775fad83e64a87260fc/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | dist/ 3 | build/ 4 | tychos/helpers/__pycache__/* 5 | tychos/__pycache__/* 6 | *.egg 7 | *.egg-info -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2023 Weatherman Labs, Inc 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Tychos Python Library 2 | The Tychos Python library provides convenient access to the Tychos API from 3 | applications written in the Python language. The Tychos API allows you to query live, hosted vector datasets in your LLM application without needing to manage your own vector database / embedding pipelines. 4 | 5 | To see the Tychos API in action, you can test out our [PubMed Demo App](https://tychos.ai/demo). 6 | 7 | ## Installation 8 | 9 | You don't need this source code unless you want to modify the package. If you just 10 | want to use the package, just run: 11 | 12 | ```sh 13 | pip install tychos 14 | ``` 15 | 16 | Install from source with: 17 | 18 | ```sh 19 | python setup.py install 20 | ``` 21 | ### Requirements 22 | 23 | - Python 2.7+ or Python 3.6+ 24 | - Requests 25 | 26 | ## Usage 27 | 28 | The library needs to be configured with your account's secret key which is 29 | available via the [Tychos Website][api-keys]. Either set the TYCHOS_API_KEY environment variable before using the library: 30 | 31 | ```python 32 | import tychos 33 | export TYCHOS_API_KEY='sk_a9adj...' 34 | ``` 35 | 36 | Or initialize the VectorDataStore using an API key: 37 | ```python 38 | import tychos 39 | data_store = tychos.VectorDataStore(api_key="sk_a9adj...") 40 | ``` 41 | 42 | ### Query live vector datasets 43 | ```python 44 | # initialize data store 45 | data_store = tychos.VectorDataStore() 46 | 47 | # list available datasets 48 | datasets = data_store.list() 49 | 50 | # get name of the first dataset's id 51 | print(datasets['data'][0]['name']) 52 | 53 | # query a single dataset from the data store object 54 | query_results = data_store.query( 55 | name = "pub-med-abstracts", # dataset index can be a string or an array 56 | query_string = "What is the latest research on molecular peptides", # search string 57 | limit = 5, # number of results 58 | ) 59 | 60 | # query multiple datasets and return the global top results 61 | query_results = data_store.query( 62 | name = ["arxiv-abstracts", "pub-med-abstracts"], # dataset index can be a string or an array 63 | query_string = "What is the latest research on molecular peptides", # search string 64 | limit = 5, # number of results (across all datasets queried) 65 | ) 66 | 67 | # print the metadata associated with the first result 68 | print(query_results[0]['payload']) 69 | ``` 70 | 71 | ### Filter queries on metadata fields 72 | You can filter queries of individual datasets by passing a query_filter dict that specifies the field, operator and condition to apply. The following operators are currently available: 73 | 74 | | Operator | Checks if the field value is... | 75 | | :--- | :--- | 76 | | $eq | **equal to** the specified value| 77 | | $ne | **not equal to** the specified value| 78 | | $in | **within** the specified array| 79 | | $nin | **not within** the specified array| 80 | 81 | Example queries using filters: 82 | ```python 83 | # filter PubMed query on articles within a particular journal 84 | query_results = data_store.query( 85 | name = "pub-med-abstracts", 86 | query_string = "What is the latest research on molecular peptides", 87 | query_filter = {"Journal": {"$eq":"New England Journal of Medicine"}} 88 | limit = 5, 89 | ) 90 | 91 | # filter ArXiv query on papers written by LeCun, Hinton and Bengio 92 | query_results = data_store.query( 93 | name = "arxiv-abstracts", 94 | query_string = "What is the latest research on molecular peptides", 95 | query_filter = {"authors": {"$in":["LeCun", "Hinton", "Bengio"]}} 96 | limit = 5, 97 | ) 98 | 99 | ``` 100 | 101 | See the datasets table below for the metadata fields available on each. We are working on adding additional query operators and fields (e.g., date ranges). As we expand datasets, we also plan to make available a set of general filters (e.g., date, author, type) for queries across multiple datasets. 102 | 103 | ## Command-line interface 104 | This library additionally provides a tychos command-line utility to make it easy to interact with the API from your terminal. Run tychos-cli -h for usage. 105 | 106 | ```sh 107 | tychos-cli query --api-key --name pub-med-abstracts --query-string <"Your query string"> --limit 5 108 | 109 | ``` 110 | 111 | ## Datasets available 112 | We currently support a range of pre-print, research, and patent datasets and have plans to add additional sources in the coming weeks. If there's a particular dataset you'd like to incorporate into your LLM application, feel free to [reach out][twitter] or raise a GitHub issue. 113 | 114 | ### Vector datasets 115 | | Dataset | Name | Size | Syncs | Metadata Fields | 116 | | :--------------- | :--------------- | :--------------- | :--------------- | :--------------------- | 117 | | PubMed ([source][pub-med]) | pub-med-abstracts | 35.5M documents | Daily at 07:00 UTC | **All fields:** PMID, PMCID, Title, Abstract, Authors, Abstract_URL, PMC_URL, Journal, Publication Date
**Query filterable:** Authors, Journal | 118 | | US Patents ([source][patents]) | us-patents | 6.9M patents | Quarterly at 07:00 UTC (1st of Quarter) | **All fields:** patent_id, title, summary, claims, patent_url, inventors, classification, type, assignees, location, date_filed, date_granted, term
**Query filterable:** coming soon! | 119 | | ArXiv ([source][arxiv]) | arxiv-abstracts | 2.3M documents | Weekly at 07:00 UTC (Sunday) | **All fields:** id, doi, paper_title, abstract, authors, categories, abstract_url, full_text_url, journal, pub_date, update_date
**Query filterable:** authors, categories, journal | 120 | | BioRxiv ([source][biorxiv]) | biorxiv | 285.5K documents | Monthly at 07:00 UTC (Sunday) | **All fields:** doi, title, abstract, authors, category, jatsxml, author_corresponding, author_corresponding_institution, date, date_timestamp, license, published, type
**Query filterable:** authors, category, date_timestamp | 121 | | MedRxiv ([source][medrxiv]) | medrxiv | 58.2K documents | Monthly at 07:00 UTC (Sunday) | **All fields:** doi, title, abstract, authors, category, jatsxml, author_corresponding, author_corresponding_institution, date, date_timestamp, license, published, type
**Query filterable:** authors, category, date_timestamp | 122 | 123 | ## Feedback and support 124 | If you'd like to provide feedback, run into issues, or need support using embeddings, feel free to [reach out][twitter] or raise a GitHub issue. 125 | 126 | 127 | [api-keys]: https://tychos.ai/ 128 | [twitter]: https://twitter.com/etpuisfume 129 | [pub-med]: https://pubmed.ncbi.nlm.nih.gov/download/ 130 | [arxiv]: https://info.arxiv.org/help/bulk_data/index.html 131 | [patents]: https://patentsview.org/download/data-download-tables 132 | [biorxiv]: https://www.biorxiv.org/ 133 | [medrxiv]: https://www.medrxiv.org/ -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | with open('README.md', 'r', encoding='utf-8') as readme_file: 4 | long_description = readme_file.read() 5 | 6 | setup( 7 | version='0.1.4', 8 | description='Python client library for the Tychos API.', 9 | long_description=long_description, 10 | long_description_content_type='text/markdown', 11 | python_requires='>=3.6', 12 | install_requires=[ 13 | 'requests', 14 | ], 15 | entry_points={ 16 | 'console_scripts': [ 17 | 'tychos-cli=tychos.cli:main', 18 | ], 19 | }, 20 | ) -------------------------------------------------------------------------------- /tests/test_tychos.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from tychos import Tychos, VectorDataStore, _Vector 3 | import os 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | 8 | class TestTychos(unittest.TestCase): 9 | def test_set_api_key(self): 10 | tychos = Tychos() 11 | tychos.api_key = os.getenv('USER_TYCHOS_API_KEY') 12 | self.assertEqual(tychos.api_key, os.getenv('USER_TYCHOS_API_KEY')) 13 | 14 | def test_vector_data_store(self): 15 | tychos = Tychos() 16 | tychos.api_key = os.getenv('USER_TYCHOS_API_KEY') 17 | vector_data_store = VectorDataStore(api_key=tychos.api_key) 18 | result = vector_data_store.list() 19 | self.assertIsNotNone(result) 20 | self.assertIsInstance(result, dict) 21 | 22 | def test_vector_data_store_query(self): 23 | tychos = Tychos() 24 | tychos.api_key = os.getenv('USER_TYCHOS_API_KEY') 25 | vector_data_store = VectorDataStore(api_key=tychos.api_key) 26 | result = vector_data_store.query('name', 'query_string', 10) 27 | self.assertIsNotNone(result) 28 | self.assertIsInstance(result, dict) 29 | 30 | def test_vector(self): 31 | tychos = Tychos() 32 | tychos.api_key = os.getenv('USER_TYCHOS_API_KEY') 33 | vector = _Vector(api_key=tychos.api_key) 34 | result = vector.create('text_embedding', 'input_text', 'text-embedding-ada-002') 35 | self.assertIsNotNone(result) 36 | self.assertIsInstance(result, list) 37 | 38 | if __name__ == '__main__': 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /tychos.egg-info/dependency_links.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /tychos.egg-info/entry_points.txt: -------------------------------------------------------------------------------- 1 | [console_scripts] 2 | tychos-cli = tychos.cli:main 3 | -------------------------------------------------------------------------------- /tychos.egg-info/requires.txt: -------------------------------------------------------------------------------- 1 | requests 2 | -------------------------------------------------------------------------------- /tychos.egg-info/top_level.txt: -------------------------------------------------------------------------------- 1 | tychos 2 | -------------------------------------------------------------------------------- /tychos/__init__.py: -------------------------------------------------------------------------------- 1 | from .vector_data_store import VectorDataStore 2 | from .vector import _Vector 3 | 4 | import os 5 | 6 | class Tychos: 7 | @property 8 | def api_key(self): 9 | return os.getenv('TYCHOS_API_KEY') 10 | 11 | @api_key.setter 12 | def api_key(self, value): 13 | os.environ['TYCHOS_API_KEY'] = value -------------------------------------------------------------------------------- /tychos/__pycache__/__init__.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tychos-ai/tychos-python/78c9fc2c45ee93f8fbfb6775fad83e64a87260fc/tychos/__pycache__/__init__.cpython-311.pyc -------------------------------------------------------------------------------- /tychos/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tychos-ai/tychos-python/78c9fc2c45ee93f8fbfb6775fad83e64a87260fc/tychos/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /tychos/__pycache__/cli.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tychos-ai/tychos-python/78c9fc2c45ee93f8fbfb6775fad83e64a87260fc/tychos/__pycache__/cli.cpython-39.pyc -------------------------------------------------------------------------------- /tychos/__pycache__/config.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tychos-ai/tychos-python/78c9fc2c45ee93f8fbfb6775fad83e64a87260fc/tychos/__pycache__/config.cpython-39.pyc -------------------------------------------------------------------------------- /tychos/__pycache__/vector.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tychos-ai/tychos-python/78c9fc2c45ee93f8fbfb6775fad83e64a87260fc/tychos/__pycache__/vector.cpython-311.pyc -------------------------------------------------------------------------------- /tychos/__pycache__/vector_data_store.cpython-311.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tychos-ai/tychos-python/78c9fc2c45ee93f8fbfb6775fad83e64a87260fc/tychos/__pycache__/vector_data_store.cpython-311.pyc -------------------------------------------------------------------------------- /tychos/cli.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import argparse 3 | from .vector_data_store import VectorDataStore 4 | 5 | def query(args): 6 | tds = VectorDataStore(api_key=args.api_key) 7 | result = tds.query(args.name, args.query_string, args.limit) 8 | print(result) 9 | 10 | def main(): 11 | parser = argparse.ArgumentParser(description='Tychos CLI') 12 | subparsers = parser.add_subparsers() 13 | 14 | query_parser = subparsers.add_parser('query') 15 | query_parser.add_argument('--api-key', required=True, help='Tychos API key') 16 | query_parser.add_argument('--name', required=True, help='name of the index to query') 17 | query_parser.add_argument('--query-string', required=True, help='query string to search against index') 18 | query_parser.add_argument('--limit', type=int, default=5, help='number of results to return') 19 | query_parser.set_defaults(func=query) 20 | 21 | args = parser.parse_args() 22 | if 'func' in args: 23 | # Call the function associated with the provided sub-command 24 | args.func(args) 25 | else: 26 | # No sub-command was provided 27 | parser.print_help() 28 | sys.exit(1) 29 | 30 | if __name__ == "__main__": 31 | main() -------------------------------------------------------------------------------- /tychos/config.py: -------------------------------------------------------------------------------- 1 | class TychosConfig: 2 | def __init__(self): 3 | self._api_key = None 4 | 5 | def __get__(self, instance, owner): 6 | return self._api_key 7 | 8 | def __set__(self, instance, value): 9 | self._api_key = value -------------------------------------------------------------------------------- /tychos/helpers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tychos-ai/tychos-python/78c9fc2c45ee93f8fbfb6775fad83e64a87260fc/tychos/helpers/__init__.py -------------------------------------------------------------------------------- /tychos/helpers/validation_checks.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def validate_query_filter(query_filter): 4 | valid_operators = ['$eq', '$ne', '$gt', '$gte', '$lt', '$lte', '$in', '$nin'] 5 | date_pattern = re.compile(r'\d{4}-\d{2}-\d{2}') # Pattern for ISO 8601 dates 6 | 7 | if not isinstance(query_filter, dict): 8 | raise ValueError("query_filter must be a dictionary.") 9 | 10 | for key, value in query_filter.items(): 11 | # Validate that value is a dictionary with a valid operator 12 | if not isinstance(value, dict): 13 | raise ValueError("The value in query_filter must be a dictionary.") 14 | if len(value) != 1: 15 | raise ValueError("The value in query_filter must contain exactly one operator.") 16 | 17 | operator, operand = list(value.items())[0] 18 | if operator not in valid_operators: 19 | raise ValueError(f"Invalid operator '{operator}' in query filter. Use one of the following operators: {', '.join(valid_operators)}") 20 | 21 | # Validate the operand based on the operator 22 | if operator in ['$eq', '$ne']: 23 | if not isinstance(operand, (int, str, bool)): 24 | raise ValueError(f"The operand of '{operator}' must be an integer, string, or boolean.") 25 | elif operator in ['$gt', '$gte', '$lt', '$lte']: 26 | if isinstance(operand, int): 27 | continue 28 | elif isinstance(operand, str): 29 | if not date_pattern.fullmatch(operand): 30 | raise ValueError(f"The operand of '{operator}' must be an integer or date string in ISO 8601 format.") 31 | else: 32 | raise ValueError(f"The operand of '{operator}' must be an integer or date string.") 33 | elif operator in ['$in', '$nin']: 34 | if not isinstance(operand, list) or not all(isinstance(i, (int, str)) for i in operand): 35 | raise ValueError(f"The operand of '{operator}' must be a list of integers or strings.") 36 | return True -------------------------------------------------------------------------------- /tychos/vector.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | 4 | class _Vector: 5 | def __init__(self, api_key=None): 6 | self.api_key = api_key or os.getenv('TYCHOS_API_KEY') 7 | self.base_url = 'https://api.tychos.ai/' 8 | 9 | def create(self, type, input_text, model, model_provider_key=None): 10 | if self.api_key is None: 11 | raise ValueError("API key not set. Please set the API key using 'tychos.api_key = '. If you need to create an API key, you can go so at tychos.ai") 12 | if type == "text_embedding": 13 | if model == "text-embedding-ada-002": 14 | try: 15 | url = f'{self.base_url}v1/vector/create' 16 | headers = {'api_key': self.api_key} 17 | payload = { 18 | 'model_provider_key': model_provider_key, 19 | 'input': input_text, 20 | 'model': model, 21 | } 22 | response = requests.post(url=url, headers=headers, json=payload) 23 | 24 | # error handling 25 | response.raise_for_status() 26 | 27 | return response.json() 28 | except Exception as e: 29 | print(e) 30 | return None 31 | else: 32 | print("Model not currently supported, try text-embedding-ada-002") 33 | return None 34 | else: 35 | print("Type not currently supported, try text_embedding") 36 | return None 37 | -------------------------------------------------------------------------------- /tychos/vector_data_store.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | from .vector import _Vector 4 | from .helpers.validation_checks import validate_query_filter 5 | 6 | class VectorDataStore: 7 | def __init__(self, api_key=None): 8 | self.api_key = api_key or os.getenv('TYCHOS_API_KEY') 9 | self.base_url = 'https://api.tychos.ai/' 10 | self.vector = _Vector(api_key=self.api_key) 11 | 12 | def query(self, name, query_string, limit, query_filter=None): 13 | if self.api_key is None: 14 | raise ValueError("API key not set. Please set the API key using 'tychos.api_key = '. If you need to create an API key, you can go so at tychos.ai") 15 | # vectorize query string 16 | query_vector = self.vector.create( 17 | type="text_embedding", 18 | input_text=query_string, 19 | model="text-embedding-ada-002" 20 | ) 21 | 22 | # validate index name 23 | available_indices = ['pub-med-abstracts', 'arxiv-abstracts', 'us-patents', 'biorxiv', 'medrxiv'] 24 | if not isinstance(name, list): 25 | name = [name] 26 | invalid_names = [n for n in name if n not in available_indices] 27 | if invalid_names: 28 | raise ValueError(f"Invalid index name(s): {', '.join(invalid_names)}. The current available datasets are: {', '.join(available_indices)}") 29 | 30 | # send query request to vector data store 31 | url = f'{self.base_url}v1/vector_data_store/query' 32 | headers = {'api_key': self.api_key} 33 | payload = { 34 | 'name': name, 35 | 'query_vector': query_vector, 36 | 'top': limit, 37 | } 38 | if query_filter is not None: 39 | validate_query_filter(query_filter) 40 | payload['query_filter'] = query_filter 41 | response = requests.post(url=url, headers=headers, json=payload) 42 | 43 | # error handling 44 | response.raise_for_status() 45 | 46 | return response.json() 47 | 48 | def list(self): 49 | if self.api_key is None: 50 | raise ValueError("API key not set. Please set the API key using 'tychos.api_key = '. If you need to create an API key, you can go so at tychos.ai") 51 | url = f'{self.base_url}v1/vector_data_store/list' 52 | headers = {'api_key': self.api_key} 53 | response = requests.get(url=url, headers=headers) 54 | 55 | # error handling 56 | response.raise_for_status() 57 | 58 | return response.json() --------------------------------------------------------------------------------