├── .flake8
├── .github
    └── workflows
    │   └── publish-to-pypi.yml
├── .gitignore
├── .isort.cfg
├── .readthedocs.yaml
├── LICENSE
├── README.md
├── agent_search
    ├── __init__.py
    ├── app
    │   └── server.py
    ├── core
    │   ├── __init__.py
    │   ├── client.py
    │   ├── search_types.py
    │   └── utils.py
    ├── providers
    │   ├── __init__.py
    │   └── sciphi.py
    ├── scripts
    │   ├── populate_qdrant_from_postgres.py
    │   ├── run_rag.py
    │   └── run_search.py
    └── search
    │   ├── __init__.py
    │   └── base.py
├── data
    └── config.ini
├── docs
    ├── Makefile
    ├── README.md
    ├── make.bat
    ├── requirements-docs.txt
    └── source
    │   ├── api
    │       └── main.rst
    │   ├── assets
    │       └── logos
    │       │   └── sciphi.png
    │   ├── conf.py
    │   ├── index.rst
    │   ├── python_client
    │       └── main.rst
    │   └── setup
    │       ├── installation.rst
    │       └── quick_start.rst
├── examples
    └── recursive_agent_search.py
└── pyproject.toml


/.flake8:
--------------------------------------------------------------------------------
1 | [flake8]
2 | exclude = playground/*
3 | ignore = E501, W503, E203, F541, W293, W291, E266
4 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-to-pypi.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to PyPI
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - "*"
 7 | 
 8 | jobs:
 9 |   publish:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout code
13 |         uses: actions/checkout@v2
14 | 
15 |       - name: Set up Python
16 |         uses: actions/setup-python@v2
17 |         with:
18 |           python-version: 3.9
19 | 
20 |       - name: Install poetry
21 |         run: pip install poetry
22 | 
23 |       - name: Build and publish
24 |         run: |
25 |           poetry build
26 |           poetry publish --username __token__ --password ${{ secrets.PYPI_API_TOKEN }}
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # local data
2 | data/
3 | playground/
4 | **/__pycache__
5 | dist/
6 | test/
7 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | profile = black
3 | multi_line_output = 3
4 | include_trailing_comma = true
5 | force_grid_wrap = 0
6 | use_parentheses = true
7 | ensure_newline_before_comments = true
8 | line_length = 79
9 | sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | # Read the Docs configuration file
 2 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
 3 | 
 4 | version: 2
 5 | 
 6 | build:
 7 |   os: ubuntu-22.04
 8 |   tools:
 9 |     python: "3.8"
10 | 
11 | sphinx:
12 |   configuration: docs/source/conf.py
13 | 
14 | # If using Sphinx, optionally build your docs in additional formats such as PDF
15 | formats:
16 |   - pdf
17 | 
18 | # Optionally declare the Python requirements required to build your docs
19 | python:
20 |   install:
21 |     - requirements: docs/requirements-docs.txt
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 | 1.  Definitions.
  8 | 
  9 |     "License" shall mean the terms and conditions for use, reproduction,
 10 |     and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |     "Licensor" shall mean the copyright owner or entity authorized by
 13 |     the copyright owner that is granting the License.
 14 | 
 15 |     "Legal Entity" shall mean the union of the acting entity and all
 16 |     other entities that control, are controlled by, or are under common
 17 |     control with that entity. For the purposes of this definition,
 18 |     "control" means (i) the power, direct or indirect, to cause the
 19 |     direction or management of such entity, whether by contract or
 20 |     otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |     outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |     "You" (or "Your") shall mean an individual or Legal Entity
 24 |     exercising permissions granted by this License.
 25 | 
 26 |     "Source" form shall mean the preferred form for making modifications,
 27 |     including but not limited to software source code, documentation
 28 |     source, and configuration files.
 29 | 
 30 |     "Object" form shall mean any form resulting from mechanical
 31 |     transformation or translation of a Source form, including but
 32 |     not limited to compiled object code, generated documentation,
 33 |     and conversions to other media types.
 34 | 
 35 |     "Work" shall mean the work of authorship, whether in Source or
 36 |     Object form, made available under the License, as indicated by a
 37 |     copyright notice that is included in or attached to the work
 38 |     (an example is provided in the Appendix below).
 39 | 
 40 |     "Derivative Works" shall mean any work, whether in Source or Object
 41 |     form, that is based on (or derived from) the Work and for which the
 42 |     editorial revisions, annotations, elaborations, or other modifications
 43 |     represent, as a whole, an original work of authorship. For the purposes
 44 |     of this License, Derivative Works shall not include works that remain
 45 |     separable from, or merely link (or bind by name) to the interfaces of,
 46 |     the Work and Derivative Works thereof.
 47 | 
 48 |     "Contribution" shall mean any work of authorship, including
 49 |     the original version of the Work and any modifications or additions
 50 |     to that Work or Derivative Works thereof, that is intentionally
 51 |     submitted to Licensor for inclusion in the Work by the copyright owner
 52 |     or by an individual or Legal Entity authorized to submit on behalf of
 53 |     the copyright owner. For the purposes of this definition, "submitted"
 54 |     means any form of electronic, verbal, or written communication sent
 55 |     to the Licensor or its representatives, including but not limited to
 56 |     communication on electronic mailing lists, source code control systems,
 57 |     and issue tracking systems that are managed by, or on behalf of, the
 58 |     Licensor for the purpose of discussing and improving the Work, but
 59 |     excluding communication that is conspicuously marked or otherwise
 60 |     designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |     "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |     on behalf of whom a Contribution has been received by Licensor and
 64 |     subsequently incorporated within the Work.
 65 | 
 66 | 2.  Grant of Copyright License. Subject to the terms and conditions of
 67 |     this License, each Contributor hereby grants to You a perpetual,
 68 |     worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |     copyright license to reproduce, prepare Derivative Works of,
 70 |     publicly display, publicly perform, sublicense, and distribute the
 71 |     Work and such Derivative Works in Source or Object form.
 72 | 
 73 | 3.  Grant of Patent License. Subject to the terms and conditions of
 74 |     this License, each Contributor hereby grants to You a perpetual,
 75 |     worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |     (except as stated in this section) patent license to make, have made,
 77 |     use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |     where such license applies only to those patent claims licensable
 79 |     by such Contributor that are necessarily infringed by their
 80 |     Contribution(s) alone or by combination of their Contribution(s)
 81 |     with the Work to which such Contribution(s) was submitted. If You
 82 |     institute patent litigation against any entity (including a
 83 |     cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |     or a Contribution incorporated within the Work constitutes direct
 85 |     or contributory patent infringement, then any patent licenses
 86 |     granted to You under this License for that Work shall terminate
 87 |     as of the date such litigation is filed.
 88 | 
 89 | 4.  Redistribution. You may reproduce and distribute copies of the
 90 |     Work or Derivative Works thereof in any medium, with or without
 91 |     modifications, and in Source or Object form, provided that You
 92 |     meet the following conditions:
 93 | 
 94 |     (a) You must give any other recipients of the Work or
 95 |     Derivative Works a copy of this License; and
 96 | 
 97 |     (b) You must cause any modified files to carry prominent notices
 98 |     stating that You changed the files; and
 99 | 
100 |     (c) You must retain, in the Source form of any Derivative Works
101 |     that You distribute, all copyright, patent, trademark, and
102 |     attribution notices from the Source form of the Work,
103 |     excluding those notices that do not pertain to any part of
104 |     the Derivative Works; and
105 | 
106 |     (d) If the Work includes a "NOTICE" text file as part of its
107 |     distribution, then any Derivative Works that You distribute must
108 |     include a readable copy of the attribution notices contained
109 |     within such NOTICE file, excluding those notices that do not
110 |     pertain to any part of the Derivative Works, in at least one
111 |     of the following places: within a NOTICE text file distributed
112 |     as part of the Derivative Works; within the Source form or
113 |     documentation, if provided along with the Derivative Works; or,
114 |     within a display generated by the Derivative Works, if and
115 |     wherever such third-party notices normally appear. The contents
116 |     of the NOTICE file are for informational purposes only and
117 |     do not modify the License. You may add Your own attribution
118 |     notices within Derivative Works that You distribute, alongside
119 |     or as an addendum to the NOTICE text from the Work, provided
120 |     that such additional attribution notices cannot be construed
121 |     as modifying the License.
122 | 
123 |     You may add Your own copyright statement to Your modifications and
124 |     may provide additional or different license terms and conditions
125 |     for use, reproduction, or distribution of Your modifications, or
126 |     for any such Derivative Works as a whole, provided Your use,
127 |     reproduction, and distribution of the Work otherwise complies with
128 |     the conditions stated in this License.
129 | 
130 | 5.  Submission of Contributions. Unless You explicitly state otherwise,
131 |     any Contribution intentionally submitted for inclusion in the Work
132 |     by You to the Licensor shall be under the terms and conditions of
133 |     this License, without any additional terms or conditions.
134 |     Notwithstanding the above, nothing herein shall supersede or modify
135 |     the terms of any separate license agreement you may have executed
136 |     with Licensor regarding such Contributions.
137 | 
138 | 6.  Trademarks. This License does not grant permission to use the trade
139 |     names, trademarks, service marks, or product names of the Licensor,
140 |     except as required for reasonable and customary use in describing the
141 |     origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 | 7.  Disclaimer of Warranty. Unless required by applicable law or
144 |     agreed to in writing, Licensor provides the Work (and each
145 |     Contributor provides its Contributions) on an "AS IS" BASIS,
146 |     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |     implied, including, without limitation, any warranties or conditions
148 |     of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |     PARTICULAR PURPOSE. You are solely responsible for determining the
150 |     appropriateness of using or redistributing the Work and assume any
151 |     risks associated with Your exercise of permissions under this License.
152 | 
153 | 8.  Limitation of Liability. In no event and under no legal theory,
154 |     whether in tort (including negligence), contract, or otherwise,
155 |     unless required by applicable law (such as deliberate and grossly
156 |     negligent acts) or agreed to in writing, shall any Contributor be
157 |     liable to You for damages, including any direct, indirect, special,
158 |     incidental, or consequential damages of any character arising as a
159 |     result of this License or out of the use or inability to use the
160 |     Work (including but not limited to damages for loss of goodwill,
161 |     work stoppage, computer failure or malfunction, or any and all
162 |     other commercial damages or losses), even if such Contributor
163 |     has been advised of the possibility of such damages.
164 | 
165 | 9.  Accepting Warranty or Additional Liability. While redistributing
166 |     the Work or Derivative Works thereof, You may choose to offer,
167 |     and charge a fee for, acceptance of support, warranty, indemnity,
168 |     or other liability obligations and/or rights consistent with this
169 |     License. However, in accepting such obligations, You may act only
170 |     on Your own behalf and on Your sole responsibility, not on behalf
171 |     of any other Contributor, and only if You agree to indemnify,
172 |     defend, and hold each Contributor harmless for any liability
173 |     incurred by, or claims asserted against, such Contributor by reason
174 |     of your accepting any such warranty or additional liability.
175 | 
176 | END OF TERMS AND CONDITIONS
177 | 
178 | APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 | Copyright 2023 Emergent AGI Inc.
190 | 
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # AgentSearch: A framework for powering search agents and enabling customizable local search.
  2 | 
  3 | ![AgentSearch Banner](https://github.com/SciPhi-AI/agent-search/assets/68796651/8d0424e6-84e3-42f6-9893-3d63f9b2a58d)
  4 | 
  5 | AgentSearch is a framework for powering search agents by seamlessly integrating LLM technologies from various providers with different search engines. This integration enables search agents to perform a wide range of functions through Retrieval-Augmented Generation (RAG), including summarizing search results, generating new queries, and retrieving detailed downstream results.
  6 | 
  7 | ## Features of AgentSearch
  8 | 
  9 | - **Search Agent Integration**: Effortlessly build a search agent by connecting any search-specialized LLM, such as [Sensei-7B](https://huggingface.co/SciPhi/Sensei-7B-V1), with a supported search engine.
 10 | - **Customizable Search**: Utilize the [AgentSearch dataset](https://huggingface.co/datasets/SciPhi/AgentSearch-V1) in conjunction with this framework to deploy a customizable local search engine.
 11 | - **API Endpoint Integration**: Seamlessly integrate with a variety of hosted provider APIs for diverse search solutions, offering ease of use and flexibility, including Bing, SERP API, and AgentSearch. Additionally, support is provided for LLMs from SciPhi, HuggingFace, OpenAI, Anthropic, and more.
 12 | 
 13 | ## Quickstart Guide
 14 | 
 15 | ### Installation
 16 | 
 17 | ```bash
 18 | pip install agent-search
 19 | ```
 20 | 
 21 | ### Configuration
 22 | 
 23 | Get your free API key from [SciPhi](https://www.sciphi.ai/signup) and set it in your environment:
 24 | 
 25 | ```bash
 26 | export SCIPHI_API_KEY=$MY_SCIPHI_API_KEY
 27 | ```
 28 | 
 29 | ### Usage
 30 | 
 31 | Call a pre-configured search agent endpoint:
 32 | 
 33 | ```python
 34 | # Requires SCIPHI_API_KEY in the environment
 35 | from agent_search import SciPhi
 36 | 
 37 | client = SciPhi()
 38 | 
 39 | # Search, then summarize result and generate related queries
 40 | agent_summary = client.get_search_rag_response(query='latest news', search_provider='bing', llm_model='SciPhi/Sensei-7B-V1')
 41 | print(agent_summary)
 42 | # { 'response': '...', 'other_queries': '...', 'search_results': '...' }
 43 | ```
 44 | 
 45 | Standalone searches and from the AgentSearch search engine are supported:
 46 | 
 47 | ```python
 48 | # Requires SCIPHI_API_KEY in the environment
 49 | from agent_search import SciPhi
 50 | 
 51 | client = SciPhi()
 52 | 
 53 | # Perform a search
 54 | search_response = client.search(query='Quantum Field Theory', search_provider='agent-search')
 55 | 
 56 | print(search_response)
 57 | # [{ 'score': '.89', 'url': 'https://...', 'metadata': {...} }]
 58 | ```
 59 | 
 60 | Code your own custom search agent workflow:
 61 | 
 62 | ```python
 63 | # Requires SCIPHI_API_KEY in the environment
 64 | from agent_search import SciPhi
 65 | 
 66 | client = SciPhi()
 67 | 
 68 | # Specify instructions for the task
 69 | instruction = "Your task is to perform retrieval augmented generation (RAG) over the given query and search results. Return your answer in a json format that includes a summary of the search results and a list of related queries."
 70 | query = "What is Fermat's Last Theorem?"
 71 | 
 72 | # construct search context
 73 | search_response = client.search(query=query, search_provider='agent-search')
 74 | search_context = "\n\n".join(
 75 |       f"{idx + 1}. Title: {item['title']}\nURL: {item['url']}\nText: {item['text']}"
 76 |       for idx, item in enumerate(search_response)
 77 | ).encode('utf-8')
 78 | 
 79 | # Prefix to enforce a JSON response 
 80 | json_response_prefix = '{"summary":'
 81 | 
 82 | # Prepare a prompt
 83 | formatted_prompt = f"### Instruction:{instruction}\n\nQuery:\n{query}\n\nSearch Results:\n${search_context}\n\nQuery:\n{query}\n### Response:\n{json_response_prefix}",
 84 | 
 85 | # Generate a completion with Sensei-7B-V1
 86 | completion = json_response_prefix + client.completion(formatted_prompt, llm_model_name="SciPhi/Sensei-7B-V1")
 87 | 
 88 | print(completion)
 89 | # {
 90 | #   "summary":  "\nFermat's Last Theorem is a mathematical proposition first prop ... ",
 91 | #   "other_queries": ["The role of elliptic curves in the proof of Fermat's Last Theorem", ...]
 92 | # }
 93 | ```
 94 | 
 95 | ## Community & Support
 96 | 
 97 | - **Engage with Us:** Join our [Discord community](https://discord.gg/mN4kWbsgRu) for discussions and updates.
 98 | - **Feedback & Inquiries:** Contact us via email for personalized support.
 99 | 
100 | ### Additional Notes
101 | 
102 | - Execute commands from the root directory of the AgentSearch project.
103 | - User Guide coming soon!
104 | 


--------------------------------------------------------------------------------
/agent_search/__init__.py:
--------------------------------------------------------------------------------
1 | from agent_search.providers import SciPhi
2 | 
3 | __all__ = ["SciPhi"]
4 | 


--------------------------------------------------------------------------------
/agent_search/app/server.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import time
  3 | from typing import Optional
  4 | 
  5 | from pydantic import BaseModel
  6 | 
  7 | from agent_search.core.utils import load_config, select_top_urls
  8 | from agent_search.search import WebSearchEngine
  9 | 
 10 | # Attempt to import uvicorn and FastAPI
 11 | try:
 12 |     import uvicorn
 13 |     from fastapi import FastAPI, HTTPException
 14 | except ImportError as e:
 15 |     raise ImportError(
 16 |         f"Error: {e}, Note - both uvicorn and FastAPI are required to run the server."
 17 |     )
 18 | 
 19 | 
 20 | logger = logging.getLogger(__name__)
 21 | 
 22 | 
 23 | class SearchServer:
 24 |     def __init__(self):
 25 |         self.client = WebSearchEngine()
 26 | 
 27 |     def run(
 28 |         self,
 29 |         query="What is a lagrangian?",
 30 |         limit_broad_results=1_000,
 31 |         limit_deduped_url_results=50,
 32 |         limit_hierarchical_url_results=50,
 33 |         limit_final_pagerank_results=20,
 34 |         url_contains_filter=None,
 35 |     ):
 36 |         """Run a search query using the WebSearchEngine client"""
 37 | 
 38 |         query_vector = self.client.get_query_vector(query)
 39 | 
 40 |         broad_results = self.client.similarity_search(
 41 |             query_vector=query_vector, limit=limit_broad_results
 42 |         )
 43 | 
 44 |         if not url_contains_filter:
 45 |             url_contains_filter = []
 46 | 
 47 |         deduped_url_results = select_top_urls(
 48 |             broad_results,
 49 |             max_urls=limit_deduped_url_results,
 50 |             url_contains=url_contains_filter,
 51 |         )
 52 | 
 53 |         hierarchical_url_results = (
 54 |             self.client.hierarchical_similarity_reranking(
 55 |                 query_vector=query_vector,
 56 |                 urls=deduped_url_results,
 57 |                 limit=limit_hierarchical_url_results,
 58 |             )
 59 |         )
 60 | 
 61 |         pagerank_reranked_results = self.client.pagerank_reranking(
 62 |             hierarchical_url_results
 63 |         )[:limit_final_pagerank_results]
 64 | 
 65 |         return pagerank_reranked_results
 66 | 
 67 | 
 68 | class SearchQuery(BaseModel):
 69 |     """A search query data model"""
 70 | 
 71 |     query: str
 72 |     limit_broad_results: Optional[int] = 1_000
 73 |     limit_deduped_url_results: Optional[int] = 100
 74 |     limit_hierarchical_url_results: Optional[int] = 25
 75 |     limit_final_pagerank_results: Optional[int] = 10
 76 | 
 77 | 
 78 | app = FastAPI()
 79 | search_runner = SearchServer()
 80 | 
 81 | 
 82 | def check_limits(query: SearchQuery):
 83 |     """Check if the limit parameters exceed three times their default values"""
 84 |     if query.limit_broad_results > 3 * 1_000:
 85 |         raise ValueError(
 86 |             "limit_broad_results exceeds 3 times its default value"
 87 |         )
 88 |     if query.limit_deduped_url_results > 3 * 100:
 89 |         raise ValueError(
 90 |             "limit_deduped_url_results exceeds 3 times its default value"
 91 |         )
 92 |     if query.limit_hierarchical_url_results > 3 * 25:
 93 |         raise ValueError(
 94 |             "limit_hierarchical_url_results exceeds 3 times its default value"
 95 |         )
 96 |     if query.limit_final_pagerank_results > 3 * 10:
 97 |         raise ValueError(
 98 |             "limit_final_pagerank_results exceeds 3 times its default value"
 99 |         )
100 | 
101 | 
102 | @app.post("/search")
103 | def run_search(query: SearchQuery):
104 |     """Run a search query"""
105 |     try:
106 |         check_limits(query)
107 |         results = search_runner.run(
108 |             query=query.query,
109 |             limit_broad_results=query.limit_broad_results,
110 |             limit_deduped_url_results=query.limit_deduped_url_results,
111 |             limit_hierarchical_url_results=query.limit_hierarchical_url_results,
112 |             limit_final_pagerank_results=query.limit_final_pagerank_results,
113 |         )
114 |         return {"results": results}
115 |     except ValueError as e:
116 |         logger.error(f"ValueError {e} = ", e)
117 |         raise HTTPException(status_code=400, detail=str(e))
118 |     except Exception as e:
119 |         logger.error(f"Exception {e} = ", e)
120 |         raise HTTPException(status_code=500, detail=str(e))
121 | 
122 | 
123 | @app.get("/health")
124 | def health_check():
125 |     """Health check endpoint"""
126 |     return {"status": "ok"}
127 | 
128 | 
129 | if __name__ == "__main__":
130 |     config = load_config()["server"]
131 |     logging.basicConfig(level=config["log_level"])
132 |     uvicorn.run(app, host=config["host"], port=int(config["port"]))
133 | 


--------------------------------------------------------------------------------
/agent_search/core/__init__.py:
--------------------------------------------------------------------------------
1 | from .client import AgentSearchClient
2 | from .search_types import AgentSearchResult
3 | 
4 | __all__ = ["AgentSearchClient", "AgentSearchResult"]
5 | 


--------------------------------------------------------------------------------
/agent_search/core/client.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import List, Optional
 3 | 
 4 | import requests
 5 | 
 6 | from .search_types import AgentSearchResult
 7 | 
 8 | 
 9 | class AgentSearchClient:
10 |     def __init__(
11 |         self,
12 |         api_base: Optional[str] = None,
13 |         auth_token: Optional[str] = None,
14 |     ):
15 |         self.api_base = (
16 |             api_base or os.getenv("SCIPHI_API_BASE") or "https://api.sciphi.ai"
17 |         )
18 |         self.auth_token = auth_token or os.getenv("SCIPHI_API_KEY")
19 | 
20 |         if not self.auth_token:
21 |             raise ValueError(
22 |                 "No authorization token provided and SCIPHI_API_KEY environment variable is not set."
23 |             )
24 | 
25 |     def search(
26 |         self,
27 |         query: str,
28 |         limit_broad_results: int = 1_000,
29 |         limit_deduped_url_results: int = 100,
30 |         limit_hierarchical_url_results: int = 25,
31 |         limit_final_pagerank_results: int = 10,
32 |     ) -> List[AgentSearchResult]:
33 |         headers = {
34 |             "Authorization": f"Bearer {self.auth_token}",
35 |             "Content-Type": "application/json",
36 |         }
37 |         payload = {
38 |             "query": query,
39 |             "limit_broad_results": limit_broad_results,
40 |             "limit_deduped_url_results": limit_deduped_url_results,
41 |             "limit_hierarchical_url_results": limit_hierarchical_url_results,
42 |             "limit_final_pagerank_results": limit_final_pagerank_results,
43 |         }
44 |         response = requests.post(
45 |             f"{self.api_base}/search", headers=headers, json=payload
46 |         )
47 |         response.raise_for_status()  # Raises an HTTPError if the HTTP request returned an unsuccessful status code
48 |         results = response.json()['results']
49 | 
50 |         serp_results = [
51 |             AgentSearchResult.from_dict(result) for result in results
52 |         ]
53 | 
54 |         return serp_results
55 | 


--------------------------------------------------------------------------------
/agent_search/core/search_types.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional
 2 | 
 3 | from pydantic import BaseModel
 4 | 
 5 | 
 6 | class AgentSearchResult(BaseModel):
 7 |     """A dataclass to store the search result"""
 8 | 
 9 |     score: float
10 |     url: str
11 |     title: Optional[str]
12 |     dataset: Optional[str]
13 |     # TODO - Add dict(str, [str, float, ..]) validation
14 |     metadata: Any
15 |     text: str
16 | 
17 |     def __init__(self, **data: Any):
18 |         super().__init__(**data)
19 |         if self.title and self.title == self.text[0 : len(self.title)]:
20 |             self.text = self.text[len(self.title) :]
21 |         self.text = self.text.strip()
22 | 
23 |     def to_string_dict(self) -> dict:
24 |         """Returns a dictionary representation with all values as strings."""
25 |         return {
26 |             "score": str(self.score),
27 |             "url": self.url,
28 |             "title": self.title,
29 |             "dataset": self.dataset,
30 |             "metadata": self.metadata,
31 |             "text": self.text,
32 |         }
33 | 
34 |     @classmethod
35 |     def from_dict(cls, data: dict):
36 |         return cls(**data)
37 | 


--------------------------------------------------------------------------------
/agent_search/core/utils.py:
--------------------------------------------------------------------------------
 1 | import configparser
 2 | import os
 3 | from typing import List, Optional
 4 | 
 5 | import numpy as np
 6 | 
 7 | from agent_search.core import AgentSearchResult
 8 | 
 9 | 
10 | def select_top_urls(
11 |     ordered_points: List[AgentSearchResult],
12 |     max_urls: int = 10,
13 |     url_contains: Optional[List[str]] = None,
14 | ) -> List[str]:
15 |     """A function to return the top unique URLs from the given poitns results."""
16 |     if not url_contains:
17 |         url_contains = []
18 | 
19 |     top_urls = set([])
20 |     for point in ordered_points:
21 |         url = point.url
22 |         if url in top_urls:
23 |             continue
24 |         url_contains_match = False if url_contains else True
25 |         for url_contain in url_contains:
26 |             if url_contain in url:
27 |                 url_contains_match = True
28 |                 break
29 |         if not url_contains_match:
30 |             continue
31 |         top_urls.add(point.url)
32 |         if len(top_urls) >= max_urls:
33 |             break
34 | 
35 |     return list(top_urls)
36 | 
37 | 
38 | def cosine_similarity(v1: np.ndarray, v2: np.ndarray) -> float:
39 |     """Compute the cosine similarity between two vectors."""
40 |     dot_product = np.dot(v1, v2)
41 |     norm_v1 = np.linalg.norm(v1)
42 |     norm_v2 = np.linalg.norm(v2)
43 |     return dot_product / (norm_v1 * norm_v2)
44 | 
45 | 
46 | def get_data_path() -> str:
47 |     return os.path.join(
48 |         os.path.dirname(__file__),
49 |         "..",
50 |         "..",
51 |         "data",
52 |     )
53 | 
54 | 
55 | def load_config(config_dir: Optional[str] = None) -> configparser.ConfigParser:
56 |     """Load the configuration file."""
57 |     config = configparser.ConfigParser()
58 |     if not config_dir:
59 |         config_dir = get_data_path()
60 |     config.read(os.path.join(config_dir, "config.ini"))
61 |     return config
62 | 


--------------------------------------------------------------------------------
/agent_search/providers/__init__.py:
--------------------------------------------------------------------------------
1 | from .sciphi import SciPhi
2 | 
3 | __all__ = ["SciPhi"]
4 | 


--------------------------------------------------------------------------------
/agent_search/providers/sciphi.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import os
  4 | import time
  5 | from typing import Any, Dict, List, Optional
  6 | 
  7 | import httpx
  8 | from pydantic import BaseModel, Field
  9 | 
 10 | logger = logging.getLogger(__name__)
 11 | 
 12 | 
 13 | class SearchResult(BaseModel):
 14 |     score: Optional[float] = None
 15 |     url: str
 16 |     title: str
 17 |     text: str
 18 |     dataset: str
 19 |     metadata: Any
 20 | 
 21 | 
 22 | class SearchRAGResponse(BaseModel):
 23 |     response: str
 24 |     related_queries: List[str]
 25 |     search_results: List[SearchResult]
 26 | 
 27 | 
 28 | class SciPhi:
 29 |     """
 30 |     Client for interacting with the SciPhi API.
 31 | 
 32 |     Attributes:
 33 |         api_base (str): Base URL for the SciPhi API.
 34 |         api_key (str): API key for authenticating requests.
 35 |         timeout (int): Timeout for API requests in seconds.
 36 |         client (httpx.Client): HTTP client for making requests.
 37 |     """
 38 | 
 39 |     def __init__(
 40 |         self,
 41 |         api_base: Optional[str] = None,
 42 |         api_key: Optional[str] = None,
 43 |         timeout: int = 30,
 44 |     ) -> None:
 45 |         """
 46 |         Initializes the SciPhi client.
 47 | 
 48 |         Args:
 49 |             api_base (Optional[str]): Base URL for the SciPhi API.
 50 |             api_key (Optional[str]): API key for authenticating requests.
 51 |             timeout (int): Timeout for API requests in seconds.
 52 | 
 53 |         Raises:
 54 |             ValueError: If `api_key` is not provided.
 55 |         """
 56 | 
 57 |         self.api_base = (
 58 |             api_base or os.getenv("SCIPHI_API_BASE") or "https://api.sciphi.ai"
 59 |         )
 60 |         self.api_key = api_key or os.getenv("SCIPHI_API_KEY")
 61 |         if not self.api_key:
 62 |             raise ValueError(
 63 |                 "You must specify an explicit api_key or define `SCIPHI_API_KEY` to initialize a SciPhi client."
 64 |             )
 65 |         self.timeout = timeout
 66 |         self.client = httpx.Client(
 67 |             base_url=self.api_base,
 68 |             headers=self._auth_headers(),
 69 |             timeout=timeout,
 70 |         )
 71 | 
 72 |     def _auth_headers(self) -> Dict[str, str]:
 73 |         """
 74 |         Generates the authorization headers for the API requests.
 75 | 
 76 |         Returns:
 77 |             Dict[str, str]: Authorization headers with bearer token.
 78 |         """
 79 | 
 80 |         return {"Authorization": f"Bearer {self.api_key}"}
 81 | 
 82 |     def _handle_api_response(self, response: httpx.Response) -> Dict:
 83 |         """
 84 |         Handles the HTTP response from the API.
 85 | 
 86 |         Args:
 87 |             response (httpx.Response): The response from the API request.
 88 | 
 89 |         Returns:
 90 |             Dict: JSON response content.
 91 | 
 92 |         Raises:
 93 |             Exception: If the response indicates an error.
 94 |         """
 95 | 
 96 |         if response.is_error:
 97 |             # Handle errors appropriately
 98 |             raise Exception(
 99 |                 f"API request failed with status {response.status_code}"
100 |             )
101 |         result = response.json()
102 |         return result
103 | 
104 |     def _handle_search_response(self, search_results: Dict[str, str]) -> None:
105 |         """
106 |         Handles dictionary search resopnses from the API.
107 | 
108 |         Args:
109 |             search_results (Dict[str, str]): The response from the API request.
110 | 
111 |         Returns:
112 |             Dict: JSON response content.
113 | 
114 |         Raises:
115 |             Exception: If the response indicates an error.
116 |         """
117 | 
118 |         for result in search_results:
119 |             if "score" in result:
120 |                 result["score"] = float(result["score"])
121 |             if "metadata" in result:
122 |                 try:
123 |                     result["metadata"] = (
124 |                         json.loads(result["metadata"])
125 |                         if (
126 |                             result["metadata"] != None
127 |                             and result["metadata"] != '""'
128 |                         )
129 |                         else {}
130 |                     )
131 |                 except Exception as e:
132 |                     result["metadata"] = dict()
133 | 
134 |     def _retry_api_request(
135 |         self, method: str, url: str, payload: Dict, max_retries: int = 3
136 |     ):
137 |         """
138 |         Common method for retrying API requests with exponential backoff.
139 | 
140 |         Args:
141 |             method (str): The HTTP method to use ('get' or 'post').
142 |             url (str): The API endpoint.
143 |             payload (Dict): The payload for the request.
144 |             max_retries (int): Maximum number of retry attempts.
145 | 
146 |         Returns:
147 |             Dict: The JSON response from the API.
148 | 
149 |         Raises:
150 |             Exception: If the maximum number of retries is reached.
151 |         """
152 |         for attempt in range(max_retries):
153 |             try:
154 |                 response = getattr(self.client, method)(url, json=payload)
155 |                 return self._handle_api_response(response)
156 | 
157 |             except httpx.HTTPError as e:
158 |                 logger.info(f"HTTP error on attempt {attempt + 1}: {e}")
159 |                 if attempt < max_retries - 1:
160 |                     time.sleep(0.5 * (2**attempt))
161 | 
162 |             except Exception as e:
163 |                 logger.error(f"Error on attempt {attempt + 1}: {e}")
164 |                 if attempt < max_retries - 1:
165 |                     time.sleep(0.5 * (2**attempt))
166 | 
167 |         raise Exception("Failed to fetch data after maximum retries.")
168 | 
169 |     def search(
170 |         self, query: str, search_provider: str, max_retries: int = 3
171 |     ) -> List[Dict]:
172 |         """
173 |         Performs a search query using the SciPhi API with retry and backoff logic.
174 | 
175 |         Args:
176 |             query (str): The search query string.
177 |             search_provider (str): The search provider to use.
178 |             max_retries (int): Maximum number of retry attempts.
179 | 
180 |         Returns:
181 |         List[Dict]: A list of search results.
182 |         """
183 |         url = f"/search"
184 |         payload = {"provider": search_provider, "query": query}
185 |         try:
186 |             handled_response = self._retry_api_request(
187 |                 "post", url, payload, max_retries
188 |             )
189 |             self._handle_search_response(handled_response)
190 |             return [SearchResult(**ele).dict() for ele in handled_response]
191 | 
192 |         except Exception as e:
193 |             logger.error(f"Search request failed: {e}")
194 |             return {"error": str(e)}
195 | 
196 |     def get_search_rag_response(
197 |         self,
198 |         query: str,
199 |         search_provider: str,
200 |         llm_model: str = "SciPhi/Sensei-7B-V1",
201 |         temperature: int = 0.2,
202 |         top_p: int = 0.95,
203 |     ):
204 |         """
205 |         Retrieves a search RAG (Retrieval-Augmented Generation) response from the API.
206 | 
207 |         Args:
208 |             query (str): The search query string.
209 |             search_provider (str): The search provider to use.
210 |             llm_model (str): The language model to use.
211 |             temperature (int): The temperature setting for the query.
212 |             top_p (int): The top-p setting for the query.
213 | 
214 |         Returns:
215 |             Dict: A dictionary with the search response and related queries.
216 |         """
217 | 
218 |         if query == "":
219 |             raise ValueError("Blank query submitted.")
220 |         if search_provider not in ["bing", "agent-search"]:
221 |             raise ValueError(f"Unsupported provider, {search_provider}")
222 | 
223 |         url = f"/search_rag"
224 |         payload = {
225 |             "query": query,
226 |             "search_provider": search_provider,
227 |             "llm_model": llm_model,
228 |             "temperature": temperature,
229 |             "top_p": top_p,
230 |         }
231 |         try:
232 |             handled_response = self._retry_api_request("post", url, payload)
233 | 
234 |             # rename the other queries to `related_queries` until LLM output is re-factored.
235 |             handled_response["related_queries"] = handled_response.pop(
236 |                 "other_queries"
237 |             )
238 | 
239 |             self._handle_search_response(handled_response["search_results"])
240 |             # Use Pydantic model for parsing and validation
241 |             search_response = SearchRAGResponse(**handled_response)
242 |         except Exception as e:
243 |             logger.error(f"Search request failed: {e}")
244 |             return {"error": str(e)}
245 | 
246 |         return search_response.dict()
247 | 
248 |     def completion(
249 |         self,
250 |         prompt: str,
251 |         llm_model_name: str = "SciPhi/Sensei-7B-V1",
252 |         llm_max_tokens_to_sample: int = 1_024,
253 |         llm_temperature: float = 0.2,
254 |         llm_top_p: float = 0.90,
255 |     ) -> SearchRAGResponse:
256 |         """
257 |         Generates a completion for a given prompt using the SciPhi API.
258 | 
259 |         Args:
260 |             prompt (str): The prompt for generating completion.
261 |             llm_model_name (str): The language model to use.
262 |             llm_max_tokens_to_sample (int): Maximum number of tokens for the sample.
263 |             llm_temperature (float): The temperature setting for the query.
264 |             llm_top_p (float): The top-p setting for the query.
265 | 
266 |         Returns:
267 |             Dict: A dictionary containing the generated completion.
268 | 
269 |         Raises:
270 |             ImportError: If the `sciphi-synthesizer` package is not installed.
271 |         """
272 | 
273 |         try:
274 |             import synthesizer
275 |         except ImportError as e:
276 |             raise ImportError(
277 |                 "Please install run `pip install sciphi-synthesizer` before attempting to generate a completion."
278 |             )
279 | 
280 |         from synthesizer.core import LLMProviderName
281 |         from synthesizer.interface import LLMInterfaceManager
282 |         from synthesizer.llm import GenerationConfig
283 | 
284 |         try:
285 |             llm_interface = LLMInterfaceManager.get_interface_from_args(
286 |                 LLMProviderName("sciphi"),
287 |             )
288 | 
289 |             generation_config = GenerationConfig(
290 |                 model_name=llm_model_name,
291 |                 max_tokens_to_sample=llm_max_tokens_to_sample,
292 |                 temperature=llm_temperature,
293 |                 top_p=llm_top_p,
294 |             )
295 | 
296 |             completion = llm_interface.get_completion(
297 |                 prompt, generation_config
298 |             ).replace("</s>", "")
299 | 
300 |             return completion
301 | 
302 |         except Exception as e:
303 |             logger.error(f"Completion generation failed: {e}")
304 |             return {"error": str(e)}
305 | 
306 |     def close(self) -> None:
307 |         """
308 |         Closes the HTTP client.
309 |         """
310 | 
311 |         self.client.close()
312 | 


--------------------------------------------------------------------------------
/agent_search/scripts/populate_qdrant_from_postgres.py:
--------------------------------------------------------------------------------
  1 | """A script to populate the database with the given dataset and subset."""
  2 | import json
  3 | import logging
  4 | import multiprocessing
  5 | import uuid
  6 | 
  7 | import fire
  8 | import numpy as np
  9 | import psycopg2
 10 | from qdrant_client import QdrantClient
 11 | from qdrant_client.http import models
 12 | 
 13 | from agent_search.core.utils import load_config
 14 | 
 15 | logger = logging.getLogger(__name__)
 16 | 
 17 | EMBEDDING_VEC_SIZE = 768
 18 | 
 19 | 
 20 | def create_collection(qclient, collection_name):
 21 |     qclient.create_collection(
 22 |         collection_name=collection_name,
 23 |         vectors_config=models.VectorParams(
 24 |             size=768, distance=models.Distance.COSINE
 25 |         ),
 26 |         quantization_config=models.ScalarQuantization(
 27 |             scalar=models.ScalarQuantizationConfig(
 28 |                 type=models.ScalarType.INT8,
 29 |                 quantile=0.99,
 30 |                 always_ram=True,
 31 |             ),
 32 |         ),
 33 |     )
 34 | 
 35 | 
 36 | def process_rows(rows, output_queue):
 37 |     """Process the rows into qdrant point objects."""
 38 |     qdrant_points = []
 39 |     for row in rows:
 40 |         _, url, __, text_chunks, embeddings_binary, ___, ____ = row
 41 |         embeddings = np.frombuffer(
 42 |             embeddings_binary, dtype=np.float32
 43 |         ).reshape(-1, EMBEDDING_VEC_SIZE)
 44 | 
 45 |         text_chunks = json.loads(text_chunks)
 46 |         # Prepare data for Qdrant
 47 |         qdrant_points.append(
 48 |             models.PointStruct(
 49 |                 id=str(uuid.uuid3(uuid.NAMESPACE_DNS, url)),
 50 |                 vector=[float(ele) for ele in embeddings[0]],
 51 |                 payload={"text": text_chunks[0], "url": url},
 52 |             )
 53 |         )
 54 | 
 55 |     output_queue.put(qdrant_points)
 56 | 
 57 | 
 58 | def qdrant_writer(config, qdrant_queue, delete_existing):
 59 |     """A writer that listens for output events in a separate thread."""
 60 |     qclient = QdrantClient(
 61 |         config["qdrant_host"],
 62 |         port=config["qdrant_grpc_port"],
 63 |         prefer_grpc=config["qdrant_prefer_grpc"],
 64 |     )
 65 |     if delete_existing:
 66 |         qclient.delete_collection(config["qdrant_collection_name"])
 67 |         create_collection(qclient, config["qdrant_collection_name"])
 68 | 
 69 |     logger.info("Launching Qdrant writer")
 70 |     while True:
 71 |         try:
 72 |             points = qdrant_queue.get()
 73 |             logger.info(f"Starting Qdrant write-out...")
 74 |             if points is None:  # Sentinel to end the process
 75 |                 break
 76 |             operation_result = qclient.upsert(
 77 |                 collection_name=config["qdrant_collection_name"],
 78 |                 wait=True,
 79 |                 points=points,
 80 |             )
 81 |             logger.info(
 82 |                 f"Finished Qdrant write-out with result {operation_result}..."
 83 |             )
 84 |         except Exception as e:
 85 |             logger.info(f"Task failed with {e}")
 86 | 
 87 | 
 88 | def process_batches(config, start, end, batch_size, output_queue):
 89 |     """Processes the batches in steps of the given batch_size"""
 90 | 
 91 |     # Connect to the database
 92 |     conn = psycopg2.connect(
 93 |         dbname=config["postgres_db"],
 94 |         user=config["postgres_user"],
 95 |         password=config["postgres_password"],
 96 |         host=config["postgres_host"],
 97 |         options="-c client_encoding=UTF8",
 98 |     )
 99 |     cur = conn.cursor()
100 |     # Declare a server-side cursor with offset
101 |     cur.execute(
102 |         f"DECLARE proc_cursor CURSOR FOR SELECT * FROM {config['postgres_table_name']} OFFSET {start} LIMIT {end - start}"
103 |     )
104 | 
105 |     offset = start
106 |     while True:
107 |         logger.info(
108 |             f"Fetching a batch of size {batch_size} at offset {offset}"
109 |         )
110 |         # Fetch a batch of rows
111 |         cur.execute(f"FETCH {batch_size} FROM proc_cursor")
112 |         rows = cur.fetchall()
113 | 
114 |         if len(rows) == 0:
115 |             break
116 | 
117 |         process_rows(rows, output_queue)
118 |         offset += batch_size
119 | 
120 |         # terminate
121 |         if offset + batch_size >= end:
122 |             break
123 | 
124 |     cur.close()
125 |     conn.close()
126 | 
127 | 
128 | class PopulateQdrant:
129 |     def __init__(self):
130 |         self.config = load_config()["agent_search"]
131 | 
132 |     def run(self, num_processes=16, batch_size=1_024, delete_existing=False):
133 |         """Runs the population process for the qdrant database"""
134 |         qdrant_queue = multiprocessing.Queue()
135 |         qdrant_writer_thread = multiprocessing.Process(
136 |             target=qdrant_writer,
137 |             args=(
138 |                 self.config,
139 |                 qdrant_queue,
140 |                 delete_existing,
141 |             ),
142 |         )
143 |         qdrant_writer_thread.start()
144 | 
145 |         conn = psycopg2.connect(
146 |             dbname=self.config["postgres_db"],
147 |             user=self.config["postgres_user"],
148 |             password=self.config["postgres_password"],
149 |             host=self.config["postgres_host"],
150 |             options="-c client_encoding=UTF8",
151 |         )
152 |         cur = conn.cursor()
153 | 
154 |         # Count total number of entries
155 |         cur.execute(
156 |             f"SELECT COUNT(*) FROM {self.config['postgres_table_name']}"
157 |         )
158 |         total_count = cur.fetchone()[0]
159 |         logger.info(
160 |             f"Processing {total_count} entries in {num_processes} processes"
161 |         )
162 | 
163 |         range_size = total_count // num_processes
164 | 
165 |         # Create and start multiprocessing workflow
166 |         processes = []
167 |         for i in range(num_processes):
168 |             logger.info(f"Starting process {i}...")
169 |             start = i * range_size
170 |             end = start + range_size if i < num_processes - 1 else total_count
171 |             proc = multiprocessing.Process(
172 |                 target=process_batches,
173 |                 args=(
174 |                     self.config,
175 |                     start,
176 |                     end,
177 |                     batch_size,
178 |                     qdrant_queue,
179 |                 ),
180 |             )
181 |             processes.append(proc)
182 |             proc.start()
183 | 
184 |         # Wait for all processes to finish
185 |         for proc in processes:
186 |             proc.join()
187 | 
188 |         # send termination signal
189 |         qdrant_queue.put(None)
190 | 
191 |         cur.close()
192 |         conn.close()
193 | 
194 | 
195 | if __name__ == "__main__":
196 |     logging.basicConfig(level=logging.INFO)
197 |     logger.setLevel(logging.INFO)
198 |     fire.Fire(PopulateQdrant)
199 | 


--------------------------------------------------------------------------------
/agent_search/scripts/run_rag.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | 
  3 | import fire
  4 | 
  5 | PROMPT = """
  6 | ### Instruction:
  7 | 
  8 | Query:
  9 | {query}
 10 | 
 11 | Search Results:
 12 | {rag_context}
 13 | 
 14 | Query:
 15 | {query}
 16 | 
 17 | ### Response:
 18 | {{"response":
 19 | """
 20 | 
 21 | PROMPT = """
 22 | ### Instruction:
 23 | 
 24 | Query:
 25 | {query}
 26 | 
 27 | Search Results:
 28 | {rag_context}
 29 | 
 30 | Query:
 31 | {query}
 32 | 
 33 | ### Response:
 34 | {{"response":
 35 | """
 36 | 
 37 | 
 38 | class RagDemo:
 39 |     """A demonstration of Bing + synthesizer RAG pipeline."""
 40 | 
 41 |     def __init__(self):
 42 |         try:
 43 |             import synthesizer
 44 |         except ImportError as e:
 45 |             raise ImportError(
 46 |                 f"Demo run_rag.py failed with {e}. Please run pip install sciphi-synthesizer before attempting to run this script."
 47 |             )
 48 | 
 49 |     def run(
 50 |         self,
 51 |         query="What is a quantum field theory in curved space time?",
 52 |         # Bing RAG provider settings
 53 |         rag_provider_name="bing",
 54 |         rag_api_base="https://api.bing.microsoft.com/v7.0/search",
 55 |         # llm parameters
 56 |         llm_provider_name="sciphi",
 57 |         llm_model_name="SciPhi/Sensei-7B-V1",
 58 |         llm_max_tokens_to_sample=1_024,
 59 |         llm_temperature=0.2,
 60 |         llm_top_p=0.90,
 61 |     ):
 62 |         from synthesizer.core import LLMProviderName, RAGProviderName
 63 |         from synthesizer.interface import (
 64 |             LLMInterfaceManager,
 65 |             RAGInterfaceManager,
 66 |         )
 67 |         from synthesizer.llm import GenerationConfig
 68 | 
 69 |         # Initialize Bing RAG Interface with its configuration
 70 |         rag_interface = RAGInterfaceManager.get_interface_from_args(
 71 |             provider_name=RAGProviderName(rag_provider_name),
 72 |             api_base=rag_api_base,
 73 |         )
 74 |         rag_result = rag_interface.get_rag_context(query)
 75 | 
 76 |         # LLM Provider Settings
 77 |         llm_interface = LLMInterfaceManager.get_interface_from_args(
 78 |             LLMProviderName(llm_provider_name),
 79 |         )
 80 | 
 81 |         generation_config = GenerationConfig(
 82 |             model_name=llm_model_name,
 83 |             max_tokens_to_sample=llm_max_tokens_to_sample,
 84 |             temperature=llm_temperature,
 85 |             top_p=llm_top_p,
 86 |             # other generation params here ...
 87 |         )
 88 | 
 89 |         formatted_prompt = PROMPT.format(
 90 |             rag_context=rag_result.context, query=query
 91 |         )
 92 |         completion = '{"response":' + llm_interface.get_completion(
 93 |             formatted_prompt, generation_config
 94 |         ).replace("</s>", "")
 95 | 
 96 |         print(
 97 |             f"Search Results:\n{rag_result.meta_data}"
 98 |             + f"\nPrompt:\n{formatted_prompt}\n\n"
 99 |             + "-" * 100
100 |             + f"\nCompletion:\n{json.loads(completion)}"
101 |         )
102 | 
103 | 
104 | if __name__ == "__main__":
105 |     fire.Fire(RagDemo)
106 | 


--------------------------------------------------------------------------------
/agent_search/scripts/run_search.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import fire
 4 | import requests
 5 | 
 6 | from agent_search.core import AgentSearchClient
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class QueryDemo:
12 |     """A wrapper class to run queries on the AgentSearchClient"""
13 | 
14 |     def __init__(self):
15 |         pass
16 | 
17 |     def run(
18 |         self,
19 |         query="What is a lagrangian?",
20 |         api_base="https://api.sciphi.ai",
21 |     ):
22 |         """Run a search with the AgentSearchClient"""
23 |         client = AgentSearchClient(api_base)
24 |         logging.basicConfig(level=logging.INFO)
25 | 
26 |         try:
27 |             results = client.search(query)
28 |             for i, result in enumerate(results):
29 |                 logging.info(
30 |                     f"{i+1}. \033[94mURL: {result.url}\033[0m (Score: \033[95m{result.score:.2f}\033[0m)"
31 |                 )
32 |                 logging.info("-" * 50)
33 |                 logging.info(f"Title: \033[93m{result.title}\033[0m")
34 |                 logging.info(f"Text:\n{result.text}\n")
35 |                 logging.info("-" * 80)
36 | 
37 |         except requests.HTTPError as e:
38 |             logging.info(f"An error occurred: {e}")
39 | 
40 | 
41 | if __name__ == "__main__":
42 |     fire.Fire(QueryDemo)
43 | 


--------------------------------------------------------------------------------
/agent_search/search/__init__.py:
--------------------------------------------------------------------------------
1 | from .base import WebSearchEngine
2 | 
3 | __all__ = ["WebSearchEngine"]
4 | 


--------------------------------------------------------------------------------
/agent_search/search/base.py:
--------------------------------------------------------------------------------
  1 | import csv
  2 | import json
  3 | import logging
  4 | import os
  5 | from typing import List
  6 | 
  7 | import numpy as np
  8 | import psycopg2
  9 | from qdrant_client import QdrantClient
 10 | from transformers import AutoModel
 11 | 
 12 | from agent_search.core import AgentSearchResult
 13 | from agent_search.core.utils import (
 14 |     cosine_similarity,
 15 |     get_data_path,
 16 |     load_config,
 17 | )
 18 | 
 19 | logger = logging.getLogger(__name__)
 20 | 
 21 | 
 22 | class WebSearchEngine:
 23 |     """A simple search client for the OpenSearch collection"""
 24 | 
 25 |     def __init__(
 26 |         self,
 27 |     ):
 28 |         try:
 29 |             import psycopg2
 30 |         except ImportError as e:
 31 |             raise ImportError(
 32 |                 f"Error {e} while imoprting psycopg2. Please install it with `pip install psycopg2` to run an WebSearchEngine instance."
 33 |             )
 34 | 
 35 |         # Load config
 36 |         self.config = load_config()["agent_search"]
 37 | 
 38 |         # Load Postgres
 39 |         logger.info(
 40 |             f"Connecting to Postgres database at: {self.config['postgres_db']}."
 41 |         )
 42 | 
 43 |         # Load qdrant client
 44 |         logger.info(
 45 |             f"Connecting to collection: {self.config['qdrant_collection_name']}"
 46 |         )
 47 |         self.qdrant_collection_name = self.config["qdrant_collection_name"]
 48 |         self.client = QdrantClient(
 49 |             self.config["qdrant_host"],
 50 |             grpc_port=self.config["qdrant_grpc_port"],
 51 |             prefer_grpc=True,
 52 |         )
 53 |         if not self.client.get_collection(self.qdrant_collection_name):
 54 |             raise ValueError(
 55 |                 f"Must have a Qdrant collection with the name {self.qdrant_collection_name}."
 56 |             )
 57 | 
 58 |         # Load embedding model
 59 |         self.embedding_model = AutoModel.from_pretrained(
 60 |             self.config["embedding_model_name"], trust_remote_code=True
 61 |         )
 62 | 
 63 |         self.pagerank_rerank_module = self.config["pagerank_rerank_module"]
 64 |         pagerank_file_path = self.config["pagerank_file_path"]
 65 |         if self.pagerank_rerank_module:
 66 |             if not pagerank_file_path:
 67 |                 # Simulating reading from a CSV file
 68 |                 pagerank_file_path = os.path.join(
 69 |                     get_data_path(), "domain_ranks.csv"
 70 |                 )
 71 | 
 72 |                 if not os.path.exists(pagerank_file_path):
 73 |                     raise ValueError(
 74 |                         "Must have a pagerank file at the config specified path when using pagerank_rerank_module"
 75 |                     )
 76 | 
 77 |             self.pagerank_importance = float(
 78 |                 self.config["pagerank_importance"]
 79 |             )
 80 |             self.domain_to_rank_map = {}
 81 | 
 82 |             with open(pagerank_file_path, newline="") as csvfile:
 83 |                 reader = csv.DictReader(csvfile)
 84 |                 for row in reader:
 85 |                     domain = row["Domain"]
 86 |                     rank = float(row["Open Page Rank"])
 87 |                     self.domain_to_rank_map[domain] = rank
 88 | 
 89 |     def get_query_vector(self, query: str):
 90 |         """Gets the query vector for the given query"""
 91 | 
 92 |         query_vector = self.embedding_model.encode(query)
 93 |         return query_vector
 94 | 
 95 |     def similarity_search(
 96 |         self,
 97 |         query_vector: np.ndarray,
 98 |         limit: int = 100,
 99 |     ):
100 |         """Searches the collection for the given query and returns the top 'limit' results"""
101 | 
102 |         points = self.client.search(
103 |             collection_name=self.qdrant_collection_name,
104 |             query_vector=query_vector,
105 |             limit=limit,
106 |         )
107 | 
108 |         results = []
109 |         for point in points:
110 |             try:
111 |                 results.append(
112 |                     AgentSearchResult(
113 |                         score=point.score,
114 |                         text=point.payload["text"],
115 |                         title=None,
116 |                         url=point.payload["url"],
117 |                         metadata={},
118 |                     )
119 |                 )
120 |             except Exception as e:
121 |                 logger.error(f"Error appending point {point} with {e}")
122 |         return results
123 | 
124 |     # Example of batch processing
125 |     def execute_batch_query(self, urls, batch_size=20):
126 |         results = []
127 |         try:
128 |             with psycopg2.connect(
129 |                 dbname=self.config["postgres_db"],
130 |                 user=self.config["postgres_user"],
131 |                 password=self.config["postgres_password"],
132 |                 host=self.config["postgres_host"],
133 |                 options="-c client_encoding=UTF8",
134 |             ) as conn:
135 |                 with conn.cursor() as cur:
136 |                     for i in range(0, len(urls), batch_size):
137 |                         batch_urls = urls[i : i + batch_size]
138 |                         logger.info(
139 |                             f"Executing batch query for URLs: {batch_urls[0:2]}"
140 |                         )
141 |                         query = f"SELECT url, title, metadata, dataset, text_chunks, embeddings FROM {self.config['postgres_table_name']} WHERE url = ANY(%s)"
142 |                         cur.execute(query, (batch_urls,))
143 |                         batch_results = cur.fetchall()
144 |                         results.extend(batch_results)
145 |         except psycopg2.DatabaseError as e:
146 |             logger.error(f"Database error: {e}")
147 |         except Exception as e:
148 |             logger.error(f"Error in execute_batch_query: {e}")
149 |         return results
150 | 
151 |     def hierarchical_similarity_reranking(
152 |         self,
153 |         query_vector: np.ndarray,
154 |         urls: List[str],
155 |         limit: int = 100,
156 |     ) -> List[AgentSearchResult]:
157 |         """Hierarchical URL search to find the most similar text chunk for the given query and URLs"""
158 |         results = self.execute_batch_query(urls)
159 |         # List to store the results along with their similarity scores
160 |         similarity_results = []
161 | 
162 |         # Iterate over each result to find the most similar text chunk
163 |         for result in results:
164 |             (
165 |                 url,
166 |                 title,
167 |                 metadata,
168 |                 dataset,
169 |                 text_chunks_str,
170 |                 embeddings_binary,
171 |             ) = result
172 |             # deserialize the embeddings and text chunks
173 |             embeddings = np.frombuffer(
174 |                 embeddings_binary, dtype=np.float32
175 |             ).reshape(-1, 768)
176 |             text_chunks = json.loads(text_chunks_str)
177 |             max_similarity = -1e9
178 |             most_similar_chunk = None
179 | 
180 |             # Iterate over each embedding to find the one with maximum cosine similarity
181 |             for chunk, embedding in zip(text_chunks, embeddings):
182 |                 similarity = cosine_similarity(
183 |                     np.array(query_vector), np.array(embedding)
184 |                 )
185 |                 if similarity > max_similarity:
186 |                     max_similarity = similarity
187 |                     most_similar_chunk = chunk
188 | 
189 |             # Store the most similar chunk and its similarity score
190 |             similarity_results.append(
191 |                 AgentSearchResult(
192 |                     score=max_similarity,
193 |                     url=url,
194 |                     title=title,
195 |                     metadata=json.loads(metadata),
196 |                     dataset=dataset,
197 |                     text=most_similar_chunk,
198 |                 ),
199 |             )
200 | 
201 |         # Sort the results based on similarity score in descending order
202 |         similarity_results.sort(key=lambda x: x.score, reverse=True)
203 |         return similarity_results[:limit]
204 | 
205 |     def pagerank_reranking(
206 |         self,
207 |         similarity_results: List[AgentSearchResult],
208 |         limit: int = 100,
209 |     ) -> List[AgentSearchResult]:
210 |         """Reranks the results based on the PageRank score of the domain"""
211 |         if not self.pagerank_rerank_module:
212 |             raise Exception(
213 |                 "PageRank reranking module is not enabled. Please set pagerank_rerank_module=True while initializing the WebSearchEngine client."
214 |             )
215 |         # List to store the results along with their PageRank scores
216 |         pagerank_results = []
217 | 
218 |         # Iterate over each result to find the PageRank score of the domain
219 |         for result in similarity_results:
220 |             pagerank_score = 0
221 |             try:
222 |                 domain = result.url.split("/")[2]
223 |                 pagerank_score = self.domain_to_rank_map.get(domain, 0)
224 |             except Exception as e:
225 |                 logger.info(f"Error {e}: Found for URL: {result.url}")
226 |             reweighted_score = (
227 |                 self.pagerank_importance * pagerank_score / 10.0
228 |                 + (1 - self.pagerank_importance) * result.score
229 |             )
230 |             pagerank_results.append(
231 |                 AgentSearchResult(
232 |                     score=reweighted_score,
233 |                     url=result.url,
234 |                     title=result.title,
235 |                     metadata=result.metadata,
236 |                     dataset=result.dataset,
237 |                     text=result.text,
238 |                 )
239 |             )
240 | 
241 |         # Sort the results based on PageRank score in descending order
242 |         pagerank_results.sort(key=lambda x: x.score, reverse=True)
243 |         return pagerank_results[:limit]
244 | 


--------------------------------------------------------------------------------
/data/config.ini:
--------------------------------------------------------------------------------
 1 | [server]
 2 | host = 0.0.0.0
 3 | port = 8000
 4 | log_level = DEBUG
 5 | 
 6 | [agent_search]
 7 | # Qdrant Settings
 8 | qdrant_host = localhost
 9 | qdrant_grpc_port = 6334
10 | qdrant_prefer_grpc = True
11 | qdrant_collection_name = agent_search_vector_index
12 | 
13 | # Postgres Settings
14 | postgres_db = root
15 | postgres_user = admin
16 | postgres_password = password
17 | postgres_host = localhost
18 | postgres_table_name = agent_search_relational_dev_2
19 | 
20 | # Embeddings Settings
21 | embedding_model_name = jinaai/jina-embeddings-v2-base-en
22 | 
23 | # PageRank Settings
24 | pagerank_rerank_module = True
25 | pagerank_importance = 0.1
26 | pagerank_file_path =


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = source
 9 | BUILDDIR      = build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # AgentSearch documents
 2 | 
 3 | ## Build the docs
 4 | 
 5 | ```bash
 6 | # Install dependencies.
 7 | pip install -r requirements-docs.txt
 8 | 
 9 | # Build the docs.
10 | make clean
11 | make html
12 | ```
13 | 
14 | ## Open the docs with your browser
15 | 
16 | ```bash
17 | python -m http.server -d build/html/
18 | ```
19 | 
20 | Launch your browser and open localhost:8000.


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=source
11 | set BUILDDIR=build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd


--------------------------------------------------------------------------------
/docs/requirements-docs.txt:
--------------------------------------------------------------------------------
1 | sphinx == 6.2.1
2 | sphinx-copybutton == 0.5.2
3 | # sphinx_rtd_dark_mode == 1.3.0
4 | sphinx-book-theme == 1.0.1


--------------------------------------------------------------------------------
/docs/source/api/main.rst:
--------------------------------------------------------------------------------
  1 | AgentSearch API Documentation
  2 | ========================
  3 | 
  4 | Welcome to the AgentSearch API documentation. Here, you'll find a detailed guide on how to use the different endpoints provided by the AgentSearch service. This API allows you to interact with the powerful functionalities of the AgentSearch codebase and associated AI models.
  5 | 
  6 | API Key and Signup
  7 | ------------------
  8 | 
  9 | To access the SciPhi API, you need an API key. If you don't possess one, you can sign up `here <https://www.sciphi.ai/signup>`_. Ensure you include the API key in your request headers as shown in the examples.
 10 | 
 11 | Endpoint Overview
 12 | -----------------
 13 | 
 14 | 1. **Search**: This endpoint allows you to fetch related search results for a given query. The results are powered by the AgentSearch framework and dataset.
 15 | 2. **Completions**: This endpoint provides completions generated by the `Sensei-7B <https://huggingface.co/SciPhi/Sensei-7B-V1/>` model, SciPhi's expert search agent.
 16 | 
 17 | Detailed Endpoint Descriptions
 18 | ------------------------------
 19 | 
 20 | Search Endpoint
 21 | ~~~~~~~~~~~~~~~
 22 | 
 23 | - **URL**: ``/search``
 24 | - **Method**: ``POST``
 25 | - **Description**: This endpoint interacts with the Retriever module of the AgentSearch-Infra codebase, allowing you to search for related documents based on the provided queries.
 26 | 
 27 | **Request Body**:
 28 |   - ``query``: A string that contains the query you wish to search for.
 29 | 
 30 | **Response**: 
 31 | A list AgentSearchResult objects. Each object contains the following fields:
 32 |   - ``score``: The ranked relevance score of the document.
 33 |   - ``url``: The URL of the document.
 34 |   - ``title``: The title of the document.
 35 |   - ``text``: The text of the document.
 36 |   - ``metadata``: A stringified JSON object containing the document's metadata.
 37 |   - ``dataset``: The name of the dataset the document belongs to.
 38 | 
 39 | **Example**:
 40 | 
 41 | .. code-block:: bash
 42 | 
 43 |    export SCIPHI_API_KEY=${MY_API_KEY}
 44 | 
 45 |    curl -X POST https://api.sciphi.ai/search \
 46 |         -H "Authorization: Bearer $SCIPHI_API_KEY" \
 47 |         -H "Content-Type: application/json" \
 48 |         -d '{"query": "What is quantum field theory in curved spacetime?"}'
 49 | 
 50 | **Response**:
 51 | 
 52 | .. code-block:: none
 53 | 
 54 |    [
 55 |     {
 56 |         "score": 0.9219069895529107,
 57 |         "url": "https://en.wikipedia.org/wiki/Quantum%20field%20theory%20in%20curved%20spacetime",
 58 |         "title": "Quantum field theory in curved spacetime",
 59 |         "dataset": "wikipedia",
 60 |         "text": "These theories rely on general relativity to describe a curved background spacetime, and define a generalized quantum field theory to describe the behavior of quantum matter within that spacetime.",
 61 |         "metadata": {},
 62 |     },
 63 |     {
 64 |         "score": 0.8924581032960278,
 65 |         "url": "https://arxiv.org/abs/1308.6773",
 66 |         "title": "Quantum field theory on curved spacetime and the standard cosmological model",
 67 |         "dataset": "arxiv",
 68 |         "text": "Algebraic quantum field theory was originally developed to understand the relation between the local degrees of freedom of quantized fields and the observed multi-particle states. It was then observed by Dimock and Kay that it provides a good starting point for formulating a theory on a curved spacetime.",
 69 |         "metadata": "{\"timestamp\": \"1995-09-29T17:38:49\", \"yymm\": \"9509\", \"arxiv_id\": \"gr-qc/9509057 ...."},
 70 |     },
 71 |     ...
 72 |    ]
 73 | 
 74 | 
 75 | Search RAG Endpoint
 76 | ~~~~~~~~~~~~~~~~~~~
 77 | 
 78 | - **URL**: ``/search_rag``
 79 | - **Method**: ``POST``
 80 | - **Description**: Retrieves a search RAG (Retrieval-Augmented Generation) response from the API.
 81 | 
 82 | **Request Body**:
 83 |   - ``query`` (str): The search query string.
 84 |   - ``search_provider`` (str): The search provider to use.
 85 |   - ``llm_model`` (str): The language model to use.
 86 |   - ``temperature`` (int): The temperature setting for the query.
 87 |   - ``top_p`` (int): The top-p setting for the query.
 88 | 
 89 | **Response**:
 90 | A dictionary with the search response and related queries.
 91 | 
 92 | **Example**:
 93 | 
 94 | .. code-block:: bash
 95 |     
 96 |     export SCIPHI_API_KEY=${MY_API_KEY}
 97 | 
 98 |     curl -X POST https://api.sciphi.ai/search_rag \
 99 |         -H "Authorization: Bearer $SCIPHI_API_KEY" \
100 |         -H "Content-Type: application/json" \
101 |         -d '{
102 |         "query": "Explain the Turing Test",
103 |         "search_provider": "bing",
104 |         "llm_model": "SciPhi/Sensei-7B-V1",
105 |         "temperature": 0.2,
106 |         "top_p": 0.95
107 |         }'
108 | 
109 | **Response**:
110 | 
111 | .. code-block:: json
112 | 
113 |     {
114 |         "response": "The Turing Test is a measure of a machine's...",
115 |         "related_queries": ["What are the origins of the Turing Test?", "How does the Turing Test work?", ...]
116 |         "search_results" : [{ ...see above... }]
117 |     }
118 | 
119 | LLM Completions Endpoint
120 | ~~~~~~~~~~~~~~~~~~~
121 | 
122 | If you would like to use the Sensei model for any purpose other than the provided Search RAG, perhaps with your own search context, then you may access the model directly. 
123 | 
124 | If you want the simplest integration with search possible, then you should rather refer to the `search_rag` endpoint above.
125 | 
126 | SciPhi adheres to the API specification of OpenAI's API, allowing compatibility with any application designed for the OpenAI API. Below is an example curl command:
127 | 
128 | **Example**:
129 | 
130 | .. code-block:: bash
131 |     
132 |     export SEARCH_CONTEXT="N/A"
133 |     export RESPONSE_PREFIX='{"response":'
134 | 
135 |     curl https://api.sciphi.ai/v1/completions \
136 |       -H "Content-Type: application/json" \
137 |       -H "Authorization: Bearer $SCIPHI_API_KEY" \
138 |       -d '{
139 |          "model": "SciPhi/Sensei-7B-V1",
140 |          "prompt": "### Instruction:\n\nQuery:\nWhat is the meaning of life?\n\nSearch Results:\n${SEARCH_CONTEXT}\n\nQuery:\nWhat is the meaning of life?\n### Response:\n${RESPONSE_PREFIX}",
141 |          "temperature": 0.0
142 |        }'
143 | 
144 | 
145 | **Response**:
146 | 
147 | .. code-block:: json
148 | 
149 |     {
150 |         "id":"cmpl-f03f53c15a174ffe89bdfc83507de7a9",
151 |         "object":"text_completion",
152 |         "created":389200,
153 |         "model":"SciPhi/Sensei-7B-V1",
154 |         "choices":[
155 |             {
156 |                 "index":0,
157 |                 "text":"The quest for the meaning of life is a profound and multifaceted in",
158 |                 "logprobs":null,
159 |                 "finish_reason":"length"
160 |             }
161 |         ],
162 |         "usage": {
163 |             "prompt_tokens":49,
164 |             "total_tokens":65,
165 |             "completion_tokens":16
166 |         }
167 |     }


--------------------------------------------------------------------------------
/docs/source/assets/logos/sciphi.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SciPhi-AI/agent-search/b47b9327f9a47d09995a09a3079b718d5ddb73c5/docs/source/assets/logos/sciphi.png


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | 
 7 | # -- Path setup --------------------------------------------------------------
 8 | 
 9 | # If extensions (or modules to document with autodoc) are in another directory,
10 | # add these directories to sys.path here. If the directory is relative to the
11 | # documentation root, use os.path.abspath to make it absolute, like shown here.
12 | #
13 | # import os
14 | # import sys
15 | # sys.path.insert(0, os.path.abspath('.'))
16 | from docutils import nodes
17 | from docutils.parsers.rst import roles
18 | 
19 | # -- Project information -----------------------------------------------------
20 | 
21 | project = "AgentSearch"
22 | copyright = "2023, Emergent AGI Inc."
23 | author = "the AgentSearch Team"
24 | 
25 | 
26 | def white_role(name, rawtext, text, lineno, inliner, options={}, content=[]):
27 |     node = nodes.inline(rawtext, text, classes=["white"])
28 |     return [node], []
29 | 
30 | 
31 | def setup(app):
32 |     roles.register_local_role("white", white_role)
33 | 
34 | 
35 | # -- General configuration ---------------------------------------------------
36 | 
37 | # Add any Sphinx extension module names here, as strings. They can be
38 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
39 | # ones.
40 | extensions = [
41 |     "sphinx.ext.napoleon",
42 |     # "sphinx.ext.viewcode",
43 |     "sphinx.ext.intersphinx",
44 |     "sphinx_copybutton",
45 |     # "sphinx_rtd_dark_mode"
46 | ]
47 | 
48 | # Add any paths that contain templates here, relative to this directory.
49 | templates_path = ["_templates"]
50 | 
51 | # List of patterns, relative to source directory, that match files and
52 | # directories to ignore when looking for source files.
53 | # This pattern also affects html_static_path and html_extra_path.
54 | exclude_patterns = []
55 | 
56 | # Exclude the prompt "$" when copying code
57 | copybutton_prompt_text = r"\$ "
58 | copybutton_prompt_is_regexp = True
59 | 
60 | # -- Options for HTML output -------------------------------------------------
61 | 
62 | # The theme to use for HTML and HTML Help pages.  See the documentation for
63 | # a list of builtin themes.
64 | #
65 | html_title = project
66 | html_theme = "sphinx_book_theme"
67 | html_logo = "assets/logos/sciphi.png"
68 | html_theme_options = {
69 |     "logo_only": True,
70 |     "path_to_docs": "docs/source",
71 |     "repository_url": "https://github.com/SciPhi-AI/agent-search",
72 |     "use_repository_button": True,
73 | }
74 | 
75 | # user starts in dark mode
76 | default_dark_mode = True
77 | 
78 | # Add any paths that contain custom static files (such as style sheets) here,
79 | # relative to this directory. They are copied after the builtin static files,
80 | # so a file named "default.css" will overwrite the builtin "default.css".
81 | html_static_path = ["_static"]
82 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
  1 | Welcome to AgentSearch [ΨΦ]
  2 | ===========================
  3 | 
  4 | .. image:: https://github.com/SciPhi-AI/agent-search/assets/68796651/56268e41-130f-4d2f-ba22-b565f7642713
  5 |    :width: 716
  6 |    :alt: AgentSearch Banner
  7 |    :align: center
  8 | 
  9 | .. raw:: html
 10 | 
 11 |    <p style="text-align:center">
 12 |    </p>
 13 | 
 14 |    <p style="text-align:center">
 15 |    <script async defer src="https://buttons.github.io/buttons.js"></script>
 16 |    <a class="github-button" href="https://github.com/SciPhi-AI/agent-search" data-show-count="true" data-size="large" aria-label="Star">Star</a>
 17 |    <a class="github-button" href="https://github.com/SciPhi-AI/agent-search/subscription" data-icon="octicon-eye" data-size="large" aria-label="Watch">Watch</a>
 18 |    <a class="github-button" href="https://github.com/SciPhi-AI/agent-search/fork" data-icon="octicon-repo-forked" data-size="large" aria-label="Fork">Fork</a>
 19 |    </p>
 20 | 
 21 | 
 22 | AgentSearch is a framework for powering search agents by seamlessly integrating LLM technologies from various providers with different search engines. This integration enables search agents to perform a wide range of functions through Retrieval-Augmented Generation (RAG), including summarizing search results, generating new queries, and retrieving detailed downstream results.
 23 | 
 24 | Features of AgentSearch
 25 | -----------------------
 26 | 
 27 | - **Search Agent Integration**: Effortlessly build a search agent by connecting any search-specialized LLM, such as `Sensei-7B <https://huggingface.co/SciPhi/Sensei-7B-V1>`_, with a supported search engine.
 28 | - **Customizable Search**: Utilize the `AgentSearch dataset <https://huggingface.co/datasets/SciPhi/AgentSearch-V1>` in conjunction with this framework to deploy a customizable local search engine.
 29 | - **API Endpoint Integration**: Seamlessly integrate with a variety of hosted provider APIs for diverse search solutions, including Bing, SERP API, and AgentSearch. Additionally, support is provided for LLMs from SciPhi, HuggingFace, OpenAI, Anthropic, and more.
 30 | 
 31 | Quickstart Guide for AgentSearch
 32 | --------------------------------
 33 | 
 34 | 1. Install the AgentSearch client:
 35 | 
 36 |    .. code-block:: shell
 37 | 
 38 |       pip install agent-search
 39 | 
 40 | 2. Obtain a free API key from SciPhi:
 41 | 
 42 |    `SciPhi API Key Signup <https://www.sciphi.ai/signup>`_
 43 | 
 44 | 3. Call a pre-configured search agent endpoint:
 45 | 
 46 |    .. code-block:: python
 47 | 
 48 |       # Requires SCIPHI_API_KEY in the environment
 49 |       from agent_search import SciPhi
 50 | 
 51 |       client = SciPhi()
 52 | 
 53 |       # Search, then summarize result and generate related queries
 54 |       agent_summary = client.get_search_rag_response(query='latest news', search_provider='bing', llm_model='SciPhi/Sensei-7B-V1')
 55 |       print(agent_summary)
 56 |       # {'response': "The latest news encompasses ... and its consequences [2].", 'related_queries': ['Details on the...', ...], 'search_results' : [...]}
 57 | 
 58 | 4. Standalone searches and from the AgentSearch search engine are supported:
 59 | 
 60 |    .. code-block:: python
 61 |       
 62 |       from agent_search import SciPhi
 63 | 
 64 |       client = SciPhi()
 65 | 
 66 |       # Perform a search
 67 |       search_response = client.search(query='Quantum Field Theory', search_provider='agent-search')
 68 | 
 69 |       print(search_response)
 70 |       # [{ 'score': '.89', 'url': 'https://...', 'metadata': {...} }
 71 | 
 72 | 5. Code your own custom search agent workflow:
 73 | 
 74 |    .. code-block:: python
 75 |       
 76 |       from agent_search import SciPhi
 77 |       import json
 78 | 
 79 |       client = SciPhi()
 80 | 
 81 |       # Specify instructions for the task
 82 |       instruction = "Your task is to perform retrieval augmented generation (RAG) over the given query and search results. Return your answer in a json format that includes a summary of the search results and a list of related queries."
 83 |       query = "What is Fermat's Last Theorem?"
 84 | 
 85 |       # construct search context
 86 |       search_response = client.search(query=query, search_provider='agent-search')
 87 |       search_context = "\n\n".join(
 88 |           f"{idx + 1}. Title: {item['title']}\nURL: {item['url']}\nText: {item['text']}"
 89 |           for idx, item in enumerate(search_response)
 90 |       ).encode('utf-8')
 91 |     
 92 |       # Prefix to enforce a JSON response 
 93 |       json_response_prefix = '{"summary":'
 94 |       
 95 |       # Prepare a prompt
 96 |       formatted_prompt = f"### Instruction:{instruction}\n\nQuery:\n{query}\n\nSearch Results:\n${search_context}\n\nQuery:\n{query}\n### Response:\n{json_response_prefix}",
 97 | 
 98 |       # Generate a raw string completion with Sensei-7B-V1
 99 |       completion = json_response_prefix + client.completion(formatted_prompt, llm_model_name="SciPhi/Sensei-7B-V1")
100 | 
101 |       print(json.loads(completion))
102 |       # {
103 |       #   "summary":  "\nFermat's Last Theorem is a mathematical proposition first prop ... ",
104 |       #   "other_queries": ["The role of elliptic curves in the proof of Fermat's Last Theorem", ...]
105 |       # }
106 | 
107 | Additional Notes
108 | ----------------
109 | 
110 | - Ensure all installation commands are executed from the root directory of the AgentSearch project.
111 | - For support, join the `Discord community <https://discord.gg/mN4kWbsgRu>`
112 | 
113 | Documentation
114 | -------------
115 | 
116 | .. toctree::
117 |    :maxdepth: 2
118 |    :caption: Getting Started
119 | 
120 |    setup/installation
121 |    setup/quick_start
122 | 
123 | .. toctree::
124 |    :maxdepth: 2
125 |    :caption: API
126 | 
127 |    api/main
128 |    python_client/main
129 | 


--------------------------------------------------------------------------------
/docs/source/python_client/main.rst:
--------------------------------------------------------------------------------
  1 | SciPhi API Client Documentation
  2 | ===============================
  3 | 
  4 | Introduction
  5 | ------------
  6 | 
  7 | The SciPhi API Client is a Python library for interacting with the SciPhi API. It provides methods for performing searches, retrieving search RAG responses, generating completions, and managing client sessions.
  8 | 
  9 | 
 10 | Use and Examples
 11 | ----------------
 12 | 
 13 | The SciPhi API Client is designed to simplify interaction with the SciPhi API. It abstracts the complexities of HTTP requests and response handling, providing a convenient interface for Python developers.
 14 | 
 15 | 
 16 | Call a pre-configured search agent endpoint:
 17 | 
 18 |    .. code-block:: python
 19 | 
 20 |       # Requires SCIPHI_API_KEY in the environment
 21 |       from agent_search import SciPhi
 22 | 
 23 |       client = SciPhi()
 24 | 
 25 |       # Search, then summarize result and generate related queries
 26 |       agent_summary = client.get_search_rag_response(query='latest news', search_provider='bing', llm_model='SciPhi/Sensei-7B-V1')
 27 |       print(agent_summary)
 28 |       # {'response': "The latest news encompasses ... and its consequences [2].", 'related_queries': ['Details on the...', ...], 'search_results' : [...]}
 29 | 
 30 | Standalone searches and from the AgentSearch search engine are supported:
 31 | 
 32 |    .. code-block:: python
 33 |       
 34 |       from agent_search import SciPhi
 35 | 
 36 |       client = SciPhi()
 37 | 
 38 |       # Perform a search
 39 |       search_response = client.search(query='Quantum Field Theory', search_provider='agent-search')
 40 | 
 41 |       print(search_response)
 42 |       # [{ 'score': '.89', 'url': 'https://...', 'metadata': {...} }
 43 | 
 44 | Code your own custom search agent workflow:
 45 | 
 46 |    .. code-block:: python
 47 |       
 48 |       from agent_search import SciPhi
 49 |       import json
 50 | 
 51 |       client = SciPhi()
 52 | 
 53 |       # Specify instructions for the task
 54 |       instruction = "Your task is to perform retrieval augmented generation (RAG) over the given query and search results. Return your answer in a json format that includes a summary of the search results and a list of related queries."
 55 |       query = "What is Fermat's Last Theorem?"
 56 | 
 57 |       # construct search context
 58 |       search_response = client.search(query=query, search_provider='agent-search')
 59 |       search_context = "\n\n".join(
 60 |           f"{idx + 1}. Title: {item['title']}\nURL: {item['url']}\nText: {item['text']}"
 61 |           for idx, item in enumerate(search_response)
 62 |       ).encode('utf-8')
 63 |     
 64 |       # Prefix to enforce a JSON response 
 65 |       json_response_prefix = '{"summary":'
 66 |       
 67 |       # Prepare a prompt
 68 |       formatted_prompt = f"### Instruction:{instruction}\n\nQuery:\n{query}\n\nSearch Results:\n${search_context}\n\nQuery:\n{query}\n### Response:\n{json_response_prefix}",
 69 | 
 70 |       # Generate a raw string completion with Sensei-7B-V1
 71 |       completion = json_response_prefix + client.completion(formatted_prompt, llm_model_name="SciPhi/Sensei-7B-V1")
 72 | 
 73 |       print(json.loads(completion))
 74 |       # {
 75 |       #   "summary":  "\nFermat's Last Theorem is a mathematical proposition first prop ... ",
 76 |       #   "other_queries": ["The role of elliptic curves in the proof of Fermat's Last Theorem", ...]
 77 |       # }
 78 | 
 79 | 
 80 | 
 81 | By encapsulating the details of the API calls, the SciPhi API Client offers a user-friendly way to leverage the advanced search and AI capabilities of the SciPhi platform.
 82 | Classes and Methods
 83 | -------------------
 84 | 
 85 | .. class:: SciPhi
 86 | 
 87 |     Client for interacting with the SciPhi API.
 88 | 
 89 |     Attributes:
 90 |         api_base (str): Base URL for the SciPhi API.
 91 |         api_key (str): API key for authenticating requests.
 92 |         timeout (int): Timeout for API requests in seconds.
 93 |         client (httpx.Client): HTTP client for making requests.
 94 | 
 95 |     .. method:: search(self, query: str, search_provider: str) -> List[Dict]
 96 | 
 97 |         Performs a search query using the SciPhi API.
 98 | 
 99 |         :param query: str: The search query string.
100 |         :param search_provider: str: The search provider to use.
101 |         :return: List[Dict]: A list of search results w/ fields that correspond with `SearchResult`, specified below.
102 | 
103 |     .. method:: get_search_rag_response(self, query: str, search_provider: str, llm_model: str = "SciPhi/Sensei-7B-V1", temperature: int = 0.2, top_p: int = 0.95)
104 | 
105 |         Retrieves a search RAG (Retrieval-Augmented Generation) response from the API.
106 | 
107 |         :param query: str: The search query string.
108 |         :param search_provider: str: The search provider to use.
109 |         :param llm_model: str: The language model to use.
110 |         :param temperature: int: The temperature setting for the query.
111 |         :param top_p: int: The top-p setting for the query.
112 |         :return: str: A string containing the completed text.
113 | 
114 |     .. method:: completion(self, prompt: str, llm_model_name: str = "SciPhi/Sensei-7B-V1", llm_max_tokens_to_sample: int = 1_024, llm_temperature: float = 0.2, llm_top_p: float = 0.90) -> str
115 | 
116 |         Generates a completion string for a given prompt using the SciPhi API.
117 | 
118 |         :param prompt: str: The prompt for generating completion.
119 |         :param llm_model_name: str: The language model to use.
120 |         :param llm_max_tokens_to_sample: int: Maximum number of tokens for the sample.
121 |         :param llm_temperature: float: The temperature setting for the query.
122 |         :param llm_top_p: float: The top-p setting for the query.
123 |         :return: Dict: A dictionary containing the generated completion.
124 |         :raises ImportError: If the `sciphi-synthesizer` package is not installed.
125 | 
126 |     .. method:: close(self) -> None
127 | 
128 |         Closes the HTTP client.
129 | 
130 | 
131 | Model Classes
132 | -------------
133 | 
134 | .. class:: SearchResult
135 | 
136 |     Represents a single search result.
137 | 
138 |     .. attribute:: score
139 | 
140 |         The score of the search result.
141 |     
142 |     .. attribute:: title
143 | 
144 |         The title of the search result.
145 | 
146 |     .. attribute:: text
147 | 
148 |         The raw text of the search result.
149 | 
150 |     .. attribute:: url
151 | 
152 |         The URL of the search result.
153 | 
154 |     .. attribute:: metadata
155 | 
156 |         Optional metadata for the search result.
157 | 
158 | .. class:: SearchRAGResponse
159 | 
160 |     Represents the response from a search or RAG query.
161 | 
162 |     .. attribute:: response
163 | 
164 |         The response text.
165 | 
166 |     .. attribute:: related_queries
167 | 
168 |         A list of related queries.
169 | 
170 |     .. attribute:: search_results
171 | 
172 |         A list of SearchResult objects.
173 | 


--------------------------------------------------------------------------------
/docs/source/setup/installation.rst:
--------------------------------------------------------------------------------
 1 | .. _agent_search_installation:
 2 | 
 3 | Installation
 4 | =====================================================
 5 | 
 6 | AgentSearch is a powerful knowledge engine that integrates with multiple LLM providers and RAG providers, allowing for customizable data creation, retriever-augmented generation, and even textbook generation.
 7 | 
 8 | Requirements
 9 | ------------
10 | 
11 | - **Python**: `>=3.9,<3.12`
12 | - **Libraries**: (Please refer to the README for a detailed list)
13 | 
14 | Fast Installation with pip
15 | --------------------------
16 | 
17 | Installing AgentSearch is as simple as using pip:
18 | 
19 | .. code-block:: console
20 | 
21 |    pip install agent-search
22 | 
23 | Development Setup
24 | -----------------
25 | 
26 | To set up AgentSearch for development:
27 | 
28 | .. code-block:: console
29 | 
30 |    git clone https://github.com/SciPhi-AI/agent-search.git
31 |    cd agent-search
32 |    pip3 install -e .
33 | 
34 | Licensing and Acknowledgment
35 | ---------------------------
36 | 
37 | AgentSearch is licensed under the Apache-2.0 License.
38 | 


--------------------------------------------------------------------------------
/docs/source/setup/quick_start.rst:
--------------------------------------------------------------------------------
 1 | .. _agent_search_quick_start:
 2 | 
 3 | =====================================
 4 |  Quick Start Index for AgentSearch [ΨΦ]
 5 | =====================================
 6 | 
 7 | Introduction
 8 | ------------
 9 | 
10 | Installation and Setup
11 | ----------------------
12 | 
13 | 1. **Install AgentSearch Client**
14 | 
15 |    .. code-block:: shell
16 | 
17 |       pip install agent-search
18 | 
19 | 2. **API Key Registration**
20 | 
21 |    - Obtain a free API key from `SciPhi <https://www.sciphi.ai/signup>`_.
22 | 
23 | 3. **Optional - Local Server Requirements**
24 | 
25 |    - Ensure Docker and Postgres are installed:
26 |      - `Docker <https://www.docker.com/>`_
27 |      - `Postgres <https://www.postgresql.org/download/>`_
28 | 
29 | Using AgentSearch
30 | -----------------
31 | 
32 | 1. **Perform a Search**
33 | 
34 |    .. code-block:: shell
35 | 
36 |       export SCIPHI_API_KEY=MY_SCIPHI_API_KEY
37 |       python -m agent_search.scripts.run_search run --query="Your Search Query"
38 | 
39 | 2. **Generate a RAG Response**
40 | 
41 |    .. code-block:: shell
42 | 
43 |       export SCIPHI_API_KEY=MY_SCIPHI_API_KEY
44 |       # Use the SciPhi `SearchAgent` for LLM RAG w/ AgentSearch
45 |       python -m agent_search.scripts.run_rag run --query="What is Fermat's last theorem?"
46 |       # ... Output ...
47 |       # {"response": "\nFermat's Last Theorem is a significant result in number theory, stating that for any natural number n greater than 2, there are no solutions to the equation \\(a^n + b^n = c^n\\) where \\(a\\), \\(b\\), and \\(c\\) are positive integers [5]. The theorem was first proposed by Pierre de Fermat in the margins of his copy of Diophantus's \"Arithmetica\" in the 17th century, but it remained unproved for over three centuries [8]. The first case of the theorem to be proven was by Fermat himself for \\(n = 4\\), using a method of infinite descent [9]. Leonhard Euler later provided a proof for the case \\(n = 3\\), although his initial proof contained errors that were later corrected [9].\n\nThe theorem was finally proven in its entirety in 1995 by British mathematician Andrew Wiles, using sophisticated mathematical tools and techniques that were not available during Fermat's lifetime [10]. This breakthrough marked the end of a long period of mathematical speculation and the resolution of a major historical puzzle in mathematics [10]. The proof of Fermat's Last Theorem has been hailed as one of the most significant achievements in the history of mathematics, demonstrating the power of modern mathematical methods and the persistence of mathematical inquiry over centuries [10].\n\n", "other_queries": ["Details of Fermat's Last Theorem proof", "Historical impact of Fermat's Last Theorem", "Contributions of Andrew Wiles to mathematics", "Techniques used in the proof of Fermat's Last Theorem", "Evolution of number theory post-Fermat's Last Theorem"]}</s>
48 | 
49 |       export SCIPHI_API_KEY=MY_SCIPHI_API_KEY
50 |       export OPENAI_API_KEY=MY_OPENAI_KEY
51 |       # Use OpenAI `gpt-3.5-turbo` for LLM generation
52 |       python -m agent_search.scripts.run_rag run --query="What is Fermat's last theorem?" --llm_provider_name=openai --llm_model_name=gpt-3.5-turbo
53 | 
54 | Local Setup and Initialization
55 | ------------------------------
56 | 
57 | 1. **Launch Postgres Database**
58 | 
59 |    .. code-block:: shell
60 | 
61 |       sudo service postgresql start
62 | 
63 | 2. **Populate Postgres Database**
64 | 
65 |    .. code-block:: shell
66 | 
67 |       python -m agent_search.scripts.populate_postgres_from_hf run
68 | 
69 | 3. **Start Qdrant Service with Docker**
70 | 
71 |    .. code-block:: shell
72 | 
73 |       docker run -p 6333:6333 -p 6334:6334 -v $(pwd)/qdrant_storage:/qdrant/storage:z qdrant/qdrant
74 | 
75 | 4. **Populate Vector Database (Qdrant)**
76 | 
77 |    .. code-block:: shell
78 | 
79 |       python -m agent_search.scripts.populate_qdrant_from_postgres run --delete_existing=True
80 | 
81 | 5. **Run the Server**
82 | 
83 |    .. code-block:: shell
84 | 
85 |       python -m agent_search.app.server
86 | 
87 | Additional Notes
88 | ----------------
89 | 
90 | - Execute all commands from the root directory of the AgentSearch project.
91 | - Customize the `query` parameter to suit your search requirements.
92 | 
93 | Documentation Links
94 | -------------------
95 | 
96 | - `Installation Guide <installation.html>`_ 
97 | - `Quick Start Tutorial <quick_start.html>`_
98 | - `API Documentation <../api/main.html>`_
99 | 


--------------------------------------------------------------------------------
/examples/recursive_agent_search.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from agent_search import SciPhi
 4 | 
 5 | client = SciPhi(api_key=os.environ.get("SCIPHI_API_KEY"))
 6 | 
 7 | breadth = 1
 8 | depth = 3
 9 | query = "Conservatism"
10 | 
11 | 
12 | # Generate a completion
13 | def generate_answer(query):
14 |     completion = client.get_search_rag_response(
15 |         query=query, search_provider="agent-search"
16 |     )
17 |     return completion
18 | 
19 | 
20 | def recursive_search(query, depth, breadth):
21 |     initial_completion = generate_answer(query)
22 |     responses = [
23 |         (
24 |             initial_completion["response"],
25 |             initial_completion["related_queries"][:breadth],
26 |         )
27 |     ]
28 | 
29 |     for _ in range(depth - 1):
30 |         new_related_queries = []
31 |         for item in responses[-1][1]:
32 |             further_completion = generate_answer(item)
33 |             if (
34 |                 isinstance(further_completion, dict)
35 |                 and "response" in further_completion
36 |             ):
37 |                 new_related = further_completion["related_queries"][:breadth]
38 |                 responses.append((further_completion["response"], new_related))
39 |                 new_related_queries.extend(new_related)
40 |             else:
41 |                 print(
42 |                     f"Unexpected format for further_completion: {further_completion}"
43 |                 )
44 |         responses[-1] = (responses[-1][0], new_related_queries)
45 | 
46 |     for response, related in responses:
47 |         print(response)
48 |         print("*" * 10)
49 |         print("Related queries:", related)
50 |         print("*" * 10)
51 | 
52 | 
53 | # Run 3 iterations deep and use 1 related query per iteration
54 | recursive_search(
55 |     query=query, depth=depth, breadth=breadth
56 | )  # Specify depth (3) and breadth (1) here
57 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["poetry-core", "setuptools", "wheel"]
 3 | build-backend = "poetry.core.masonry.api"
 4 | 
 5 | [tool.poetry]
 6 | name = "agent_search"
 7 | version = "0.1.0"
 8 | description = "AgentSearch: An open source framework and dataset for webscale local search."
 9 | authors = ["Owen Colegrove <owen@sciphi.ai>"]
10 | license = "Apache-2.0"
11 | readme = "README.md"
12 | 
13 | [tool.poetry.dependencies]
14 | # python version
15 | python = ">=3.9,<3.12"
16 | 
17 | # package dependencies
18 | configparser = "^5.0.0"
19 | fire = "^0.5.0"
20 | numpy = "^1.25.2"
21 | pydantic = "^1.10.13"
22 | qdrant_client = "^1.7.0"
23 | requests = "^2.31.0"
24 | sciphi_synthesizer="1.0.5"
25 | transformers = "^4.33.1"
26 | openai = "0.27.8"
27 | 
28 | # Additional Requirements
29 | # torch, fastapi, uvicorn, psycogp2
30 | 
31 | [tool.poetry.group.dev.dependencies]
32 | black = "^23.3.0"
33 | flake8 = "6.1.0"
34 | isort = "5.12.0"
35 | pre-commit = "^3.3.3"
36 | mypy = "^1.5.1"
37 | types-requests = "^2.31.0.2"
38 | types-attrs = "^19.1.0"
39 | 
40 | [tool.black]
41 | line-length = 79
42 | 
43 | [tool.mypy]
44 | ignore_missing_imports = true
45 | exclude = 'playground/.*|deprecated/.*|dump/.*|docs/source'
46 | 
47 | [[tool.mypy.overrides]]
48 | module = "yaml"
49 | ignore_missing_imports = true
50 | 


--------------------------------------------------------------------------------