├── .github └── workflows │ ├── python-package.yml │ └── python-publish.yml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── README.md ├── poetry.lock ├── praisonai_tools ├── __init__.py ├── adapters │ ├── embedchain_adapter.py │ └── lancedb_adapter.py └── tools │ ├── __init__.py │ ├── base_tool.py │ ├── browserbase_load_tool │ ├── README.md │ └── browserbase_load_tool.py │ ├── code_docs_search_tool │ ├── README.md │ └── code_docs_search_tool.py │ ├── csv_search_tool │ ├── README.md │ └── csv_search_tool.py │ ├── directory_read_tool │ ├── README.md │ └── directory_read_tool.py │ ├── directory_search_tool │ ├── README.md │ └── directory_search_tool.py │ ├── docx_search_tool │ ├── README.md │ └── docx_search_tool.py │ ├── exa_tools │ ├── README.md │ ├── exa_base_tool.py │ └── exa_search_tool.py │ ├── file_read_tool │ ├── README.md │ └── file_read_tool.py │ ├── github_search_tool │ ├── README.md │ └── github_search_tool.py │ ├── json_search_tool │ ├── README.md │ └── json_search_tool.py │ ├── llamaindex_tool │ ├── README.md │ └── llamaindex_tool.py │ ├── mdx_seach_tool │ ├── README.md │ └── mdx_search_tool.py │ ├── pdf_search_tool │ ├── README.md │ └── pdf_search_tool.py │ ├── pdf_text_writing_tool │ └── pdf_text_writing_tool.py │ ├── pg_seach_tool │ ├── README.md │ └── pg_search_tool.py │ ├── rag │ ├── README.md │ ├── __init__.py │ └── rag_tool.py │ ├── scrape_element_from_website │ └── scrape_element_from_website.py │ ├── scrape_website_tool │ ├── README.md │ └── scrape_website_tool.py │ ├── selenium_scraping_tool │ ├── README.md │ └── selenium_scraping_tool.py │ ├── serper_dev_tool │ ├── README.md │ └── serper_dev_tool.py │ ├── txt_search_tool │ ├── README.md │ └── txt_search_tool.py │ ├── website_search │ ├── README.md │ └── website_search_tool.py │ ├── xml_search_tool │ ├── README.md │ └── xml_search_tool.py │ ├── youtube_channel_search_tool │ ├── README.md │ └── youtube_channel_search_tool.py │ └── youtube_video_search_tool │ ├── README.md │ └── youtube_video_search_tool.py ├── pyproject.toml ├── tests ├── base_tool_test.py ├── conftest.py └── tools │ └── rag │ └── rag_tool_test.py └── uv.lock /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ "develop" ] 9 | pull_request: 10 | branches: [ "develop" ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.10", "3.11"] 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | python -m pip install flake8 pytest 31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi -------------------------------------------------------------------------------- /.github/workflows/python-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will upload a Python Package using Twine when a release is created 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries 3 | 4 | # This workflow uses actions that are not certified by GitHub. 5 | # They are provided by a third-party and are governed by 6 | # separate terms of service, privacy policy, and support 7 | # documentation. 8 | 9 | name: Upload Python Package 10 | 11 | on: 12 | release: 13 | types: [published] 14 | 15 | permissions: 16 | contents: read 17 | 18 | jobs: 19 | deploy: 20 | 21 | runs-on: ubuntu-latest 22 | 23 | steps: 24 | - uses: actions/checkout@v3 25 | - name: Set up Python 26 | uses: actions/setup-python@v3 27 | with: 28 | python-version: '3.x' 29 | - name: Install dependencies 30 | run: | 31 | python -m pip install --upgrade pip 32 | pip install build 33 | - name: Build package 34 | run: python -m build 35 | - name: Publish package 36 | uses: pypa/gh-action-pypi-publish@v1.9.0 37 | with: 38 | user: __token__ 39 | password: ${{ secrets.PYPI_API_TOKEN }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | .DS_Store 3 | .pytest_cache 4 | __pycache__ 5 | dist/ 6 | .env 7 | .idea 8 | test.py 9 | chroma.sqlite3 10 | .vscode 11 | praisonai 12 | .cache 13 | __pycache__ 14 | chroma.sqlite3 15 | test/ 16 | .env 17 | assets/* 18 | .idea 19 | .DS_Store 20 | .pytest_cache 21 | praisonAI.egg-info 22 | flagged 23 | test.yaml 24 | db 25 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | 3 | - repo: https://github.com/psf/black-pre-commit-mirror 4 | rev: 23.12.1 5 | hooks: 6 | - id: black 7 | language_version: python3.11 8 | files: \.(py)$ 9 | 10 | - repo: https://github.com/pycqa/isort 11 | rev: 5.13.2 12 | hooks: 13 | - id: isort 14 | name: isort (python) 15 | args: ["--profile", "black", "--filter-files"] 16 | 17 | - repo: https://github.com/PyCQA/autoflake 18 | rev: v2.2.1 19 | hooks: 20 | - id: autoflake 21 | args: ['--in-place', '--remove-all-unused-imports', '--remove-unused-variables', '--ignore-init-module-imports'] 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 João Moura 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Praison AI Tools -------------------------------------------------------------------------------- /praisonai_tools/__init__.py: -------------------------------------------------------------------------------- 1 | from crewai_tools import BaseTool, Tool, tool 2 | from crewai_tools import ( 3 | BrowserbaseLoadTool, 4 | CodeDocsSearchTool, 5 | CSVSearchTool, 6 | DirectorySearchTool, 7 | DOCXSearchTool, 8 | DirectoryReadTool, 9 | EXASearchTool, 10 | FileReadTool, 11 | GithubSearchTool, 12 | SerperDevTool, 13 | TXTSearchTool, 14 | JSONSearchTool, 15 | MDXSearchTool, 16 | PDFSearchTool, 17 | PGSearchTool, 18 | RagTool, 19 | ScrapeElementFromWebsiteTool, 20 | ScrapeWebsiteTool, 21 | SeleniumScrapingTool, 22 | WebsiteSearchTool, 23 | XMLSearchTool, 24 | YoutubeChannelSearchTool, 25 | YoutubeVideoSearchTool, 26 | LlamaIndexTool 27 | ) -------------------------------------------------------------------------------- /praisonai_tools/adapters/embedchain_adapter.py: -------------------------------------------------------------------------------- 1 | from typing import Any 2 | 3 | from embedchain import App 4 | 5 | from praisonai_tools.tools.rag.rag_tool import Adapter 6 | 7 | 8 | class EmbedchainAdapter(Adapter): 9 | embedchain_app: App 10 | summarize: bool = False 11 | 12 | def query(self, question: str) -> str: 13 | result, sources = self.embedchain_app.query( 14 | question, citations=True, dry_run=(not self.summarize) 15 | ) 16 | if self.summarize: 17 | return result 18 | return "\n\n".join([source[0] for source in sources]) 19 | 20 | def add( 21 | self, 22 | *args: Any, 23 | **kwargs: Any, 24 | ) -> None: 25 | self.embedchain_app.add(*args, **kwargs) 26 | -------------------------------------------------------------------------------- /praisonai_tools/adapters/lancedb_adapter.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from typing import Any, Callable 3 | 4 | from lancedb import DBConnection as LanceDBConnection 5 | from lancedb import connect as lancedb_connect 6 | from lancedb.table import Table as LanceDBTable 7 | from openai import Client as OpenAIClient 8 | from pydantic import Field, PrivateAttr 9 | 10 | from praisonai_tools.tools.rag.rag_tool import Adapter 11 | 12 | 13 | def _default_embedding_function(): 14 | client = OpenAIClient() 15 | 16 | def _embedding_function(input): 17 | rs = client.embeddings.create(input=input, model="text-embedding-ada-002") 18 | return [record.embedding for record in rs.data] 19 | 20 | return _embedding_function 21 | 22 | 23 | class LanceDBAdapter(Adapter): 24 | uri: str | Path 25 | table_name: str 26 | embedding_function: Callable = Field(default_factory=_default_embedding_function) 27 | top_k: int = 3 28 | vector_column_name: str = "vector" 29 | text_column_name: str = "text" 30 | 31 | _db: LanceDBConnection = PrivateAttr() 32 | _table: LanceDBTable = PrivateAttr() 33 | 34 | def model_post_init(self, __context: Any) -> None: 35 | self._db = lancedb_connect(self.uri) 36 | self._table = self._db.open_table(self.table_name) 37 | 38 | super().model_post_init(__context) 39 | 40 | def query(self, question: str) -> str: 41 | query = self.embedding_function([question])[0] 42 | results = ( 43 | self._table.search(query, vector_column_name=self.vector_column_name) 44 | .limit(self.top_k) 45 | .select([self.text_column_name]) 46 | .to_list() 47 | ) 48 | values = [result[self.text_column_name] for result in results] 49 | return "\n".join(values) 50 | 51 | def add( 52 | self, 53 | *args: Any, 54 | **kwargs: Any, 55 | ) -> None: 56 | self._table.add(*args, **kwargs) 57 | -------------------------------------------------------------------------------- /praisonai_tools/tools/__init__.py: -------------------------------------------------------------------------------- 1 | from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool 2 | from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool 3 | from .csv_search_tool.csv_search_tool import CSVSearchTool 4 | from .directory_search_tool.directory_search_tool import DirectorySearchTool 5 | from .directory_read_tool.directory_read_tool import DirectoryReadTool 6 | from .docx_search_tool.docx_search_tool import DOCXSearchTool 7 | from .exa_tools.exa_search_tool import EXASearchTool 8 | from .file_read_tool.file_read_tool import FileReadTool 9 | from .github_search_tool.github_search_tool import GithubSearchTool 10 | from .serper_dev_tool.serper_dev_tool import SerperDevTool 11 | from .txt_search_tool.txt_search_tool import TXTSearchTool 12 | from .json_search_tool.json_search_tool import JSONSearchTool 13 | from .mdx_seach_tool.mdx_search_tool import MDXSearchTool 14 | from .pdf_search_tool.pdf_search_tool import PDFSearchTool 15 | from .pg_seach_tool.pg_search_tool import PGSearchTool 16 | from .rag.rag_tool import RagTool 17 | from .scrape_element_from_website.scrape_element_from_website import ScrapeElementFromWebsiteTool 18 | from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool 19 | from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool 20 | from .website_search.website_search_tool import WebsiteSearchTool 21 | from .xml_search_tool.xml_search_tool import XMLSearchTool 22 | from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool 23 | from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool 24 | from .llamaindex_tool.llamaindex_tool import LlamaIndexTool 25 | -------------------------------------------------------------------------------- /praisonai_tools/tools/base_tool.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any, Callable, Optional, Type 3 | 4 | from langchain_core.tools import StructuredTool 5 | from pydantic import BaseModel, ConfigDict, Field, validator 6 | from pydantic.v1 import BaseModel as V1BaseModel 7 | 8 | 9 | class BaseTool(BaseModel, ABC): 10 | class _ArgsSchemaPlaceholder(V1BaseModel): 11 | pass 12 | 13 | model_config = ConfigDict() 14 | 15 | name: str 16 | """The unique name of the tool that clearly communicates its purpose.""" 17 | description: str 18 | """Used to tell the model how/when/why to use the tool.""" 19 | args_schema: Type[V1BaseModel] = Field(default_factory=_ArgsSchemaPlaceholder) 20 | """The schema for the arguments that the tool accepts.""" 21 | description_updated: bool = False 22 | """Flag to check if the description has been updated.""" 23 | cache_function: Optional[Callable] = lambda _args, _result: True 24 | """Function that will be used to determine if the tool should be cached, should return a boolean. If None, the tool will be cached.""" 25 | 26 | @validator("args_schema", always=True, pre=True) 27 | def _default_args_schema(cls, v: Type[V1BaseModel]) -> Type[V1BaseModel]: 28 | if not isinstance(v, cls._ArgsSchemaPlaceholder): 29 | return v 30 | 31 | return type( 32 | f"{cls.__name__}Schema", 33 | (V1BaseModel,), 34 | { 35 | "__annotations__": { 36 | k: v for k, v in cls._run.__annotations__.items() if k != "return" 37 | }, 38 | }, 39 | ) 40 | 41 | def model_post_init(self, __context: Any) -> None: 42 | self._generate_description() 43 | 44 | super().model_post_init(__context) 45 | 46 | def run( 47 | self, 48 | *args: Any, 49 | **kwargs: Any, 50 | ) -> Any: 51 | print(f"Using Tool: {self.name}") 52 | return self._run(*args, **kwargs) 53 | 54 | @abstractmethod 55 | def _run( 56 | self, 57 | *args: Any, 58 | **kwargs: Any, 59 | ) -> Any: 60 | """Here goes the actual implementation of the tool.""" 61 | 62 | def to_langchain(self) -> StructuredTool: 63 | self._set_args_schema() 64 | return StructuredTool( 65 | name=self.name, 66 | description=self.description, 67 | args_schema=self.args_schema, 68 | func=self._run, 69 | ) 70 | 71 | def _set_args_schema(self): 72 | if self.args_schema is None: 73 | class_name = f"{self.__class__.__name__}Schema" 74 | self.args_schema = type( 75 | class_name, 76 | (V1BaseModel,), 77 | { 78 | "__annotations__": { 79 | k: v 80 | for k, v in self._run.__annotations__.items() 81 | if k != "return" 82 | }, 83 | }, 84 | ) 85 | 86 | def _generate_description(self): 87 | args = [] 88 | for arg, attribute in self.args_schema.schema()["properties"].items(): 89 | if "type" in attribute: 90 | args.append(f"{arg}: '{attribute['type']}'") 91 | 92 | description = self.description.replace("\n", " ") 93 | self.description = f"{self.name}({', '.join(args)}) - {description}" 94 | 95 | 96 | class Tool(BaseTool): 97 | func: Callable 98 | """The function that will be executed when the tool is called.""" 99 | 100 | def _run(self, *args: Any, **kwargs: Any) -> Any: 101 | return self.func(*args, **kwargs) 102 | 103 | 104 | def to_langchain( 105 | tools: list[BaseTool | StructuredTool], 106 | ) -> list[StructuredTool]: 107 | return [t.to_langchain() if isinstance(t, BaseTool) else t for t in tools] 108 | 109 | 110 | def tool(*args): 111 | """ 112 | Decorator to create a tool from a function. 113 | """ 114 | 115 | def _make_with_name(tool_name: str) -> Callable: 116 | def _make_tool(f: Callable) -> BaseTool: 117 | if f.__doc__ is None: 118 | raise ValueError("Function must have a docstring") 119 | if f.__annotations__ is None: 120 | raise ValueError("Function must have type annotations") 121 | 122 | class_name = "".join(tool_name.split()).title() 123 | args_schema = type( 124 | class_name, 125 | (V1BaseModel,), 126 | { 127 | "__annotations__": { 128 | k: v for k, v in f.__annotations__.items() if k != "return" 129 | }, 130 | }, 131 | ) 132 | 133 | return Tool( 134 | name=tool_name, 135 | description=f.__doc__, 136 | func=f, 137 | args_schema=args_schema, 138 | ) 139 | 140 | return _make_tool 141 | 142 | if len(args) == 1 and callable(args[0]): 143 | return _make_with_name(args[0].__name__)(args[0]) 144 | if len(args) == 1 and isinstance(args[0], str): 145 | return _make_with_name(args[0]) 146 | raise ValueError("Invalid arguments") 147 | -------------------------------------------------------------------------------- /praisonai_tools/tools/browserbase_load_tool/README.md: -------------------------------------------------------------------------------- 1 | # BrowserbaseLoadTool 2 | 3 | ## Description 4 | 5 | [Browserbase](https://browserbase.com) is a serverless platform for running headless browsers, it offers advanced debugging, session recordings, stealth mode, integrated proxies and captcha solving. 6 | 7 | ## Installation 8 | 9 | - Get an API key from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`). 10 | - Install the [Browserbase SDK](http://github.com/browserbase/python-sdk) along with `praisonai[tools]` package: 11 | 12 | ``` 13 | pip install browserbase 'praisonai[tools]' 14 | ``` 15 | 16 | ## Example 17 | 18 | Utilize the BrowserbaseLoadTool as follows to allow your agent to load websites: 19 | 20 | ```python 21 | from praisonai_tools import BrowserbaseLoadTool 22 | 23 | tool = BrowserbaseLoadTool() 24 | ``` 25 | 26 | ## Arguments 27 | 28 | - `api_key`: Optional. Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable. 29 | - `text_content`: Optional. Load pages as readable text. Default is `False`. 30 | -------------------------------------------------------------------------------- /praisonai_tools/tools/browserbase_load_tool/browserbase_load_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Any, Type 2 | from pydantic.v1 import BaseModel, Field 3 | from praisonai_tools.tools.base_tool import BaseTool 4 | 5 | class BrowserbaseLoadToolSchema(BaseModel): 6 | url: str = Field(description="Website URL") 7 | 8 | class BrowserbaseLoadTool(BaseTool): 9 | name: str = "Browserbase web load tool" 10 | description: str = "Load webpages url in a headless browser using Browserbase and return the contents" 11 | args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema 12 | api_key: Optional[str] = None 13 | text_content: Optional[bool] = False 14 | browserbase: Optional[Any] = None 15 | 16 | def __init__(self, api_key: Optional[str] = None, text_content: Optional[bool] = False, **kwargs): 17 | super().__init__(**kwargs) 18 | try: 19 | from browserbase import Browserbase # type: ignore 20 | except ImportError: 21 | raise ImportError( 22 | "`browserbase` package not found, please run `pip install browserbase`" 23 | ) 24 | 25 | self.browserbase = Browserbase(api_key=api_key) 26 | self.text_content = text_content 27 | 28 | def _run(self, url: str): 29 | return self.browserbase.load_url(url, text_content=self.text_content) 30 | -------------------------------------------------------------------------------- /praisonai_tools/tools/code_docs_search_tool/README.md: -------------------------------------------------------------------------------- 1 | # CodeDocsSearchTool 2 | 3 | ## Description 4 | The CodeDocsSearchTool is a powerful RAG (Retrieval-Augmented Generation) tool designed for semantic searches within code documentation. It enables users to efficiently find specific information or topics within code documentation. By providing a `docs_url` during initialization, the tool narrows down the search to that particular documentation site. Alternatively, without a specific `docs_url`, it searches across a wide array of code documentation known or discovered throughout its execution, making it versatile for various documentation search needs. 5 | 6 | ## Installation 7 | To start using the CodeDocsSearchTool, first, install the praisonai_tools package via pip: 8 | ```shell 9 | pip install 'praisonai[tools]' 10 | ``` 11 | 12 | ## Example 13 | Utilize the CodeDocsSearchTool as follows to conduct searches within code documentation: 14 | ```python 15 | from praisonai_tools import CodeDocsSearchTool 16 | 17 | # To search any code documentation content if the URL is known or discovered during its execution: 18 | tool = CodeDocsSearchTool() 19 | 20 | # OR 21 | 22 | # To specifically focus your search on a given documentation site by providing its URL: 23 | tool = CodeDocsSearchTool(docs_url='https://docs.example.com/reference') 24 | ``` 25 | Note: Substitute 'https://docs.example.com/reference' with your target documentation URL and 'How to use search tool' with the search query relevant to your needs. 26 | 27 | ## Arguments 28 | - `docs_url`: Optional. Specifies the URL of the code documentation to be searched. Providing this during the tool's initialization focuses the search on the specified documentation content. 29 | 30 | ## Custom model and embeddings 31 | 32 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 33 | 34 | ```python 35 | tool = YoutubeVideoSearchTool( 36 | config=dict( 37 | llm=dict( 38 | provider="ollama", # or google, openai, anthropic, llama2, ... 39 | config=dict( 40 | model="llama2", 41 | # temperature=0.5, 42 | # top_p=1, 43 | # stream=true, 44 | ), 45 | ), 46 | embedder=dict( 47 | provider="google", 48 | config=dict( 49 | model="models/embedding-001", 50 | task_type="retrieval_document", 51 | # title="Embeddings", 52 | ), 53 | ), 54 | ) 55 | ) 56 | ``` 57 | -------------------------------------------------------------------------------- /praisonai_tools/tools/code_docs_search_tool/code_docs_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | 3 | from embedchain.models.data_type import DataType 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedCodeDocsSearchToolSchema(BaseModel): 10 | """Input for CodeDocsSearchTool.""" 11 | 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory search query you want to use to search the Code Docs content", 15 | ) 16 | 17 | 18 | class CodeDocsSearchToolSchema(FixedCodeDocsSearchToolSchema): 19 | """Input for CodeDocsSearchTool.""" 20 | 21 | docs_url: str = Field(..., description="Mandatory docs_url path you want to search") 22 | 23 | 24 | class CodeDocsSearchTool(RagTool): 25 | name: str = "Search a Code Docs content" 26 | description: str = ( 27 | "A tool that can be used to semantic search a query from a Code Docs content." 28 | ) 29 | args_schema: Type[BaseModel] = CodeDocsSearchToolSchema 30 | 31 | def __init__(self, docs_url: Optional[str] = None, **kwargs): 32 | super().__init__(**kwargs) 33 | if docs_url is not None: 34 | self.add(docs_url) 35 | self.description = f"A tool that can be used to semantic search a query the {docs_url} Code Docs content." 36 | self.args_schema = FixedCodeDocsSearchToolSchema 37 | self._generate_description() 38 | 39 | def add( 40 | self, 41 | *args: Any, 42 | **kwargs: Any, 43 | ) -> None: 44 | kwargs["data_type"] = DataType.DOCS_SITE 45 | super().add(*args, **kwargs) 46 | 47 | def _before_run( 48 | self, 49 | query: str, 50 | **kwargs: Any, 51 | ) -> Any: 52 | if "docs_url" in kwargs: 53 | self.add(kwargs["docs_url"]) 54 | 55 | def _run( 56 | self, 57 | search_query: str, 58 | **kwargs: Any, 59 | ) -> Any: 60 | return super()._run(query=search_query) 61 | -------------------------------------------------------------------------------- /praisonai_tools/tools/csv_search_tool/README.md: -------------------------------------------------------------------------------- 1 | # CSVSearchTool 2 | 3 | ## Description 4 | 5 | This tool is used to perform a RAG (Retrieval-Augmented Generation) search within a CSV file's content. It allows users to semantically search for queries in the content of a specified CSV file. This feature is particularly useful for extracting information from large CSV datasets where traditional search methods might be inefficient. All tools with "Search" in their name, including CSVSearchTool, are RAG tools designed for searching different sources of data. 6 | 7 | ## Installation 8 | 9 | Install the praisonai_tools package 10 | 11 | ```shell 12 | pip install 'praisonai[tools]' 13 | ``` 14 | 15 | ## Example 16 | 17 | ```python 18 | from praisonai_tools import CSVSearchTool 19 | 20 | # Initialize the tool with a specific CSV file. This setup allows the agent to only search the given CSV file. 21 | tool = CSVSearchTool(csv='path/to/your/csvfile.csv') 22 | 23 | # OR 24 | 25 | # Initialize the tool without a specific CSV file. Agent will need to provide the CSV path at runtime. 26 | tool = CSVSearchTool() 27 | ``` 28 | 29 | ## Arguments 30 | 31 | - `csv` : The path to the CSV file you want to search. This is a mandatory argument if the tool was initialized without a specific CSV file; otherwise, it is optional. 32 | 33 | ## Custom model and embeddings 34 | 35 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 36 | 37 | ```python 38 | tool = CSVSearchTool( 39 | config=dict( 40 | llm=dict( 41 | provider="ollama", # or google, openai, anthropic, llama2, ... 42 | config=dict( 43 | model="llama2", 44 | # temperature=0.5, 45 | # top_p=1, 46 | # stream=true, 47 | ), 48 | ), 49 | embedder=dict( 50 | provider="google", 51 | config=dict( 52 | model="models/embedding-001", 53 | task_type="retrieval_document", 54 | # title="Embeddings", 55 | ), 56 | ), 57 | ) 58 | ) 59 | ``` 60 | -------------------------------------------------------------------------------- /praisonai_tools/tools/csv_search_tool/csv_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | 3 | from embedchain.models.data_type import DataType 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedCSVSearchToolSchema(BaseModel): 10 | """Input for CSVSearchTool.""" 11 | 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory search query you want to use to search the CSV's content", 15 | ) 16 | 17 | 18 | class CSVSearchToolSchema(FixedCSVSearchToolSchema): 19 | """Input for CSVSearchTool.""" 20 | 21 | csv: str = Field(..., description="Mandatory csv path you want to search") 22 | 23 | 24 | class CSVSearchTool(RagTool): 25 | name: str = "Search a CSV's content" 26 | description: str = ( 27 | "A tool that can be used to semantic search a query from a CSV's content." 28 | ) 29 | args_schema: Type[BaseModel] = CSVSearchToolSchema 30 | 31 | def __init__(self, csv: Optional[str] = None, **kwargs): 32 | super().__init__(**kwargs) 33 | if csv is not None: 34 | self.add(csv) 35 | self.description = f"A tool that can be used to semantic search a query the {csv} CSV's content." 36 | self.args_schema = FixedCSVSearchToolSchema 37 | self._generate_description() 38 | 39 | def add( 40 | self, 41 | *args: Any, 42 | **kwargs: Any, 43 | ) -> None: 44 | kwargs["data_type"] = DataType.CSV 45 | super().add(*args, **kwargs) 46 | 47 | def _before_run( 48 | self, 49 | query: str, 50 | **kwargs: Any, 51 | ) -> Any: 52 | if "csv" in kwargs: 53 | self.add(kwargs["csv"]) 54 | 55 | def _run( 56 | self, 57 | search_query: str, 58 | **kwargs: Any, 59 | ) -> Any: 60 | return super()._run(query=search_query) 61 | -------------------------------------------------------------------------------- /praisonai_tools/tools/directory_read_tool/README.md: -------------------------------------------------------------------------------- 1 | ```markdown 2 | # DirectoryReadTool 3 | 4 | ## Description 5 | The DirectoryReadTool is a highly efficient utility designed for the comprehensive listing of directory contents. It recursively navigates through the specified directory, providing users with a detailed enumeration of all files, including those nested within subdirectories. This tool is indispensable for tasks requiring a thorough inventory of directory structures or for validating the organization of files within directories. 6 | 7 | ## Installation 8 | Install the `praisonai_tools` package to use the DirectoryReadTool in your project. If you haven't added this package to your environment, you can easily install it with pip using the following command: 9 | 10 | ```shell 11 | pip install 'praisonai[tools]' 12 | ``` 13 | 14 | This installs the latest version of the `praisonai_tools` package, allowing access to the DirectoryReadTool and other utilities. 15 | 16 | ## Example 17 | The DirectoryReadTool is simple to use. The code snippet below shows how to set up and use the tool to list the contents of a specified directory: 18 | 19 | ```python 20 | from praisonai_tools import DirectoryReadTool 21 | 22 | # Initialize the tool with the directory you want to explore 23 | tool = DirectoryReadTool(directory='/path/to/your/directory') 24 | 25 | # Use the tool to list the contents of the specified directory 26 | directory_contents = tool.run() 27 | print(directory_contents) 28 | ``` 29 | 30 | This example demonstrates the essential steps to utilize the DirectoryReadTool effectively, highlighting its simplicity and user-friendly design. 31 | 32 | ## Arguments 33 | The DirectoryReadTool requires minimal configuration for use. The essential argument for this tool is as follows: 34 | 35 | - `directory`: A mandatory argument that specifies the path to the directory whose contents you wish to list. It accepts both absolute and relative paths, guiding the tool to the desired directory for content listing. 36 | 37 | The DirectoryReadTool provides a user-friendly and efficient way to list directory contents, making it an invaluable tool for managing and inspecting directory structures. 38 | ``` 39 | 40 | This revised documentation for the DirectoryReadTool maintains the structure and content requirements as outlined, with adjustments made for clarity, consistency, and adherence to the high-quality standards exemplified in the provided documentation example. 41 | -------------------------------------------------------------------------------- /praisonai_tools/tools/directory_read_tool/directory_read_tool.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Optional, Type, Any 3 | from pydantic.v1 import BaseModel, Field 4 | from ..base_tool import BaseTool 5 | 6 | class FixedDirectoryReadToolSchema(BaseModel): 7 | """Input for DirectoryReadTool.""" 8 | pass 9 | 10 | class DirectoryReadToolSchema(FixedDirectoryReadToolSchema): 11 | """Input for DirectoryReadTool.""" 12 | directory: str = Field(..., description="Mandatory directory to list content") 13 | 14 | class DirectoryReadTool(BaseTool): 15 | name: str = "List files in directory" 16 | description: str = "A tool that can be used to recursively list a directory's content." 17 | args_schema: Type[BaseModel] = DirectoryReadToolSchema 18 | directory: Optional[str] = None 19 | 20 | def __init__(self, directory: Optional[str] = None, **kwargs): 21 | super().__init__(**kwargs) 22 | if directory is not None: 23 | self.directory = directory 24 | self.description = f"A tool that can be used to list {directory}'s content." 25 | self.args_schema = FixedDirectoryReadToolSchema 26 | self._generate_description() 27 | 28 | def _run( 29 | self, 30 | **kwargs: Any, 31 | ) -> Any: 32 | directory = kwargs.get('directory', self.directory) 33 | if directory[-1] == "/": 34 | directory = directory[:-1] 35 | files_list = [f"{directory}/{(os.path.join(root, filename).replace(directory, '').lstrip(os.path.sep))}" for root, dirs, files in os.walk(directory) for filename in files] 36 | files = "\n- ".join(files_list) 37 | return f"File paths: \n-{files}" 38 | 39 | -------------------------------------------------------------------------------- /praisonai_tools/tools/directory_search_tool/README.md: -------------------------------------------------------------------------------- 1 | # DirectorySearchTool 2 | 3 | ## Description 4 | This tool is designed to perform a semantic search for queries within the content of a specified directory. Utilizing the RAG (Retrieval-Augmented Generation) methodology, it offers a powerful means to semantically navigate through the files of a given directory. The tool can be dynamically set to search any directory specified at runtime or can be pre-configured to search within a specific directory upon initialization. 5 | 6 | ## Installation 7 | To start using the DirectorySearchTool, you need to install the praisonai_tools package. Execute the following command in your terminal: 8 | 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | ## Example 14 | The following examples demonstrate how to initialize the DirectorySearchTool for different use cases and how to perform a search: 15 | 16 | ```python 17 | from praisonai_tools import DirectorySearchTool 18 | 19 | # To enable searching within any specified directory at runtime 20 | tool = DirectorySearchTool() 21 | 22 | # Alternatively, to restrict searches to a specific directory 23 | tool = DirectorySearchTool(directory='/path/to/directory') 24 | ``` 25 | 26 | ## Arguments 27 | - `directory` : This string argument specifies the directory within which to search. It is mandatory if the tool has not been initialized with a directory; otherwise, the tool will only search within the initialized directory. 28 | 29 | ## Custom model and embeddings 30 | 31 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 32 | 33 | ```python 34 | tool = DirectorySearchTool( 35 | config=dict( 36 | llm=dict( 37 | provider="ollama", # or google, openai, anthropic, llama2, ... 38 | config=dict( 39 | model="llama2", 40 | # temperature=0.5, 41 | # top_p=1, 42 | # stream=true, 43 | ), 44 | ), 45 | embedder=dict( 46 | provider="google", 47 | config=dict( 48 | model="models/embedding-001", 49 | task_type="retrieval_document", 50 | # title="Embeddings", 51 | ), 52 | ), 53 | ) 54 | ) 55 | ``` 56 | -------------------------------------------------------------------------------- /praisonai_tools/tools/directory_search_tool/directory_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | 3 | from embedchain.loaders.directory_loader import DirectoryLoader 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedDirectorySearchToolSchema(BaseModel): 10 | """Input for DirectorySearchTool.""" 11 | 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory search query you want to use to search the directory's content", 15 | ) 16 | 17 | 18 | class DirectorySearchToolSchema(FixedDirectorySearchToolSchema): 19 | """Input for DirectorySearchTool.""" 20 | 21 | directory: str = Field(..., description="Mandatory directory you want to search") 22 | 23 | 24 | class DirectorySearchTool(RagTool): 25 | name: str = "Search a directory's content" 26 | description: str = ( 27 | "A tool that can be used to semantic search a query from a directory's content." 28 | ) 29 | args_schema: Type[BaseModel] = DirectorySearchToolSchema 30 | 31 | def __init__(self, directory: Optional[str] = None, **kwargs): 32 | super().__init__(**kwargs) 33 | if directory is not None: 34 | self.add(directory) 35 | self.description = f"A tool that can be used to semantic search a query the {directory} directory's content." 36 | self.args_schema = FixedDirectorySearchToolSchema 37 | self._generate_description() 38 | 39 | def add( 40 | self, 41 | *args: Any, 42 | **kwargs: Any, 43 | ) -> None: 44 | kwargs["loader"] = DirectoryLoader(config=dict(recursive=True)) 45 | super().add(*args, **kwargs) 46 | 47 | def _before_run( 48 | self, 49 | query: str, 50 | **kwargs: Any, 51 | ) -> Any: 52 | if "directory" in kwargs: 53 | self.add(kwargs["directory"]) 54 | 55 | def _run( 56 | self, 57 | search_query: str, 58 | **kwargs: Any, 59 | ) -> Any: 60 | return super()._run(query=search_query) 61 | -------------------------------------------------------------------------------- /praisonai_tools/tools/docx_search_tool/README.md: -------------------------------------------------------------------------------- 1 | # DOCXSearchTool 2 | 3 | ## Description 4 | The DOCXSearchTool is a RAG tool designed for semantic searching within DOCX documents. It enables users to effectively search and extract relevant information from DOCX files using query-based searches. This tool is invaluable for data analysis, information management, and research tasks, streamlining the process of finding specific information within large document collections. 5 | 6 | ## Installation 7 | Install the praisonai_tools package by running the following command in your terminal: 8 | 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | ## Example 14 | The following example demonstrates initializing the DOCXSearchTool to search within any DOCX file's content or with a specific DOCX file path. 15 | 16 | ```python 17 | from praisonai_tools import DOCXSearchTool 18 | 19 | # Initialize the tool to search within any DOCX file's content 20 | tool = DOCXSearchTool() 21 | 22 | # OR 23 | 24 | # Initialize the tool with a specific DOCX file, so the agent can only search the content of the specified DOCX file 25 | tool = DOCXSearchTool(docx='path/to/your/document.docx') 26 | ``` 27 | 28 | ## Arguments 29 | - `docx`: An optional file path to a specific DOCX document you wish to search. If not provided during initialization, the tool allows for later specification of any DOCX file's content path for searching. 30 | 31 | ## Custom model and embeddings 32 | 33 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 34 | 35 | ```python 36 | tool = DOCXSearchTool( 37 | config=dict( 38 | llm=dict( 39 | provider="ollama", # or google, openai, anthropic, llama2, ... 40 | config=dict( 41 | model="llama2", 42 | # temperature=0.5, 43 | # top_p=1, 44 | # stream=true, 45 | ), 46 | ), 47 | embedder=dict( 48 | provider="google", 49 | config=dict( 50 | model="models/embedding-001", 51 | task_type="retrieval_document", 52 | # title="Embeddings", 53 | ), 54 | ), 55 | ) 56 | ) 57 | ``` 58 | -------------------------------------------------------------------------------- /praisonai_tools/tools/docx_search_tool/docx_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | 3 | from embedchain.models.data_type import DataType 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedDOCXSearchToolSchema(BaseModel): 10 | """Input for DOCXSearchTool.""" 11 | docx: Optional[str] = Field(..., description="Mandatory docx path you want to search") 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory search query you want to use to search the DOCX's content", 15 | ) 16 | 17 | class DOCXSearchToolSchema(FixedDOCXSearchToolSchema): 18 | """Input for DOCXSearchTool.""" 19 | search_query: str = Field( 20 | ..., 21 | description="Mandatory search query you want to use to search the DOCX's content", 22 | ) 23 | 24 | class DOCXSearchTool(RagTool): 25 | name: str = "Search a DOCX's content" 26 | description: str = ( 27 | "A tool that can be used to semantic search a query from a DOCX's content." 28 | ) 29 | args_schema: Type[BaseModel] = DOCXSearchToolSchema 30 | 31 | def __init__(self, docx: Optional[str] = None, **kwargs): 32 | super().__init__(**kwargs) 33 | if docx is not None: 34 | self.add(docx) 35 | self.description = f"A tool that can be used to semantic search a query the {docx} DOCX's content." 36 | self.args_schema = FixedDOCXSearchToolSchema 37 | self._generate_description() 38 | 39 | def add( 40 | self, 41 | *args: Any, 42 | **kwargs: Any, 43 | ) -> None: 44 | kwargs["data_type"] = DataType.DOCX 45 | super().add(*args, **kwargs) 46 | 47 | def _before_run( 48 | self, 49 | query: str, 50 | **kwargs: Any, 51 | ) -> Any: 52 | if "docx" in kwargs: 53 | self.add(kwargs["docx"]) 54 | 55 | def _run( 56 | self, 57 | **kwargs: Any, 58 | ) -> Any: 59 | search_query = kwargs.get('search_query') 60 | if search_query is None: 61 | search_query = kwargs.get('query') 62 | 63 | docx = kwargs.get("docx") 64 | if docx is not None: 65 | self.add(docx) 66 | return super()._run(query=search_query) 67 | -------------------------------------------------------------------------------- /praisonai_tools/tools/exa_tools/README.md: -------------------------------------------------------------------------------- 1 | # EXASearchTool Documentation 2 | 3 | ## Description 4 | This tool is designed to perform a semantic search for a specified query from a text's content across the internet. It utilizes the `https://exa.ai/` API to fetch and display the most relevant search results based on the query provided by the user. 5 | 6 | ## Installation 7 | To incorporate this tool into your project, follow the installation instructions below: 8 | ```shell 9 | pip install 'praisonai[tools]' 10 | ``` 11 | 12 | ## Example 13 | The following example demonstrates how to initialize the tool and execute a search with a given query: 14 | 15 | ```python 16 | from praisonai_tools import EXASearchTool 17 | 18 | # Initialize the tool for internet searching capabilities 19 | tool = EXASearchTool() 20 | ``` 21 | 22 | ## Steps to Get Started 23 | To effectively use the `EXASearchTool`, follow these steps: 24 | 25 | 1. **Package Installation**: Confirm that the `praisonai[tools]` package is installed in your Python environment. 26 | 2. **API Key Acquisition**: Acquire a `https://exa.ai/` API key by registering for a free account at `https://exa.ai/`. 27 | 3. **Environment Configuration**: Store your obtained API key in an environment variable named `EXA_API_KEY` to facilitate its use by the tool. 28 | 29 | ## Conclusion 30 | By integrating the `EXASearchTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward. 31 | -------------------------------------------------------------------------------- /praisonai_tools/tools/exa_tools/exa_base_tool.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Type 3 | from pydantic.v1 import BaseModel, Field 4 | from praisonai_tools.tools.base_tool import BaseTool 5 | 6 | class EXABaseToolToolSchema(BaseModel): 7 | """Input for EXABaseTool.""" 8 | search_query: str = Field(..., description="Mandatory search query you want to use to search the internet") 9 | 10 | class EXABaseTool(BaseTool): 11 | name: str = "Search the internet" 12 | description: str = "A tool that can be used to search the internet from a search_query" 13 | args_schema: Type[BaseModel] = EXABaseToolToolSchema 14 | search_url: str = "https://api.exa.ai/search" 15 | n_results: int = None 16 | headers: dict = { 17 | "accept": "application/json", 18 | "content-type": "application/json", 19 | } 20 | 21 | def _parse_results(self, results): 22 | stirng = [] 23 | for result in results: 24 | try: 25 | stirng.append('\n'.join([ 26 | f"Title: {result['title']}", 27 | f"Score: {result['score']}", 28 | f"Url: {result['url']}", 29 | f"ID: {result['id']}", 30 | "---" 31 | ])) 32 | except KeyError: 33 | next 34 | 35 | content = '\n'.join(stirng) 36 | return f"\nSearch results: {content}\n" 37 | -------------------------------------------------------------------------------- /praisonai_tools/tools/exa_tools/exa_search_tool.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | from typing import Any 4 | 5 | from .exa_base_tool import EXABaseTool 6 | 7 | class EXASearchTool(EXABaseTool): 8 | def _run( 9 | self, 10 | **kwargs: Any, 11 | ) -> Any: 12 | search_query = kwargs.get('search_query') 13 | if search_query is None: 14 | search_query = kwargs.get('query') 15 | 16 | payload = { 17 | "query": search_query, 18 | "type": "magic", 19 | } 20 | 21 | headers = self.headers.copy() 22 | headers["x-api-key"] = os.environ['EXA_API_KEY'] 23 | 24 | response = requests.post(self.search_url, json=payload, headers=headers) 25 | results = response.json() 26 | if 'results' in results: 27 | results = super()._parse_results(results['results']) 28 | return results 29 | -------------------------------------------------------------------------------- /praisonai_tools/tools/file_read_tool/README.md: -------------------------------------------------------------------------------- 1 | # FileReadTool 2 | 3 | ## Description 4 | The FileReadTool is a versatile component of the praisonai_tools package, designed to streamline the process of reading and retrieving content from files. It is particularly useful in scenarios such as batch text file processing, runtime configuration file reading, and data importation for analytics. This tool supports various text-based file formats including `.txt`, `.csv`, `.json`, and adapts its functionality based on the file type, for instance, converting JSON content into a Python dictionary for easy use. 5 | 6 | ## Installation 7 | Install the praisonai_tools package to use the FileReadTool in your projects: 8 | 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | ## Example 14 | To get started with the FileReadTool: 15 | 16 | ```python 17 | from praisonai_tools import FileReadTool 18 | 19 | # Initialize the tool to read any files the agents knows or lean the path for 20 | file_read_tool = FileReadTool() 21 | 22 | # OR 23 | 24 | # Initialize the tool with a specific file path, so the agent can only read the content of the specified file 25 | file_read_tool = FileReadTool(file_path='path/to/your/file.txt') 26 | ``` 27 | 28 | ## Arguments 29 | - `file_path`: The path to the file you want to read. It accepts both absolute and relative paths. Ensure the file exists and you have the necessary permissions to access it. -------------------------------------------------------------------------------- /praisonai_tools/tools/file_read_tool/file_read_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Type, Any 2 | from pydantic.v1 import BaseModel, Field 3 | from ..base_tool import BaseTool 4 | 5 | 6 | class FixedFileReadToolSchema(BaseModel): 7 | """Input for FileReadTool.""" 8 | pass 9 | 10 | 11 | class FileReadToolSchema(FixedFileReadToolSchema): 12 | """Input for FileReadTool.""" 13 | file_path: str = Field( 14 | ..., 15 | description="Mandatory file full path to read the file" 16 | ) 17 | 18 | 19 | class FileReadTool(BaseTool): 20 | name: str = "Read a file's content" 21 | description: str = "A tool that can be used to read a file's content." 22 | args_schema: Type[BaseModel] = FileReadToolSchema 23 | file_path: Optional[str] = None 24 | 25 | def __init__( 26 | self, 27 | file_path: Optional[str] = None, 28 | **kwargs 29 | ): 30 | super().__init__(**kwargs) 31 | if file_path is not None: 32 | self.file_path = file_path 33 | self.description = f"A tool that can be used to read {file_path}'s content." 34 | self.args_schema = FixedFileReadToolSchema 35 | self._generate_description() 36 | 37 | def _run( 38 | self, 39 | **kwargs: Any, 40 | ) -> Any: 41 | try: 42 | file_path = kwargs.get('file_path', self.file_path) 43 | with open(file_path, 'r') as file: 44 | return file.read() 45 | except Exception as e: 46 | return f"Fail to read the file {file_path}. Error: {e}" 47 | -------------------------------------------------------------------------------- /praisonai_tools/tools/github_search_tool/README.md: -------------------------------------------------------------------------------- 1 | # GithubSearchTool 2 | 3 | ## Description 4 | The GithubSearchTool is a Read, Append, and Generate (RAG) tool specifically designed for conducting semantic searches within GitHub repositories. Utilizing advanced semantic search capabilities, it sifts through code, pull requests, issues, and repositories, making it an essential tool for developers, researchers, or anyone in need of precise information from GitHub. 5 | 6 | ## Installation 7 | To use the GithubSearchTool, first ensure the praisonai_tools package is installed in your Python environment: 8 | 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | This command installs the necessary package to run the GithubSearchTool along with any other tools included in the praisonai_tools package. 14 | 15 | ## Example 16 | Here’s how you can use the GithubSearchTool to perform semantic searches within a GitHub repository: 17 | ```python 18 | from praisonai_tools import GithubSearchTool 19 | 20 | # Initialize the tool for semantic searches within a specific GitHub repository 21 | tool = GithubSearchTool( 22 | gh_token='...', 23 | github_repo='https://github.com/example/repo', 24 | content_types=['code', 'issue'] # Options: code, repo, pr, issue 25 | ) 26 | 27 | # OR 28 | 29 | # Initialize the tool for semantic searches within a specific GitHub repository, so the agent can search any repository if it learns about during its execution 30 | tool = GithubSearchTool( 31 | gh_token='...', 32 | content_types=['code', 'issue'] # Options: code, repo, pr, issue 33 | ) 34 | ``` 35 | 36 | ## Arguments 37 | - `gh_token` : The GitHub token used to authenticate the search. This is a mandatory field and allows the tool to access the GitHub API for conducting searches. 38 | - `github_repo` : The URL of the GitHub repository where the search will be conducted. This is a mandatory field and specifies the target repository for your search. 39 | - `content_types` : Specifies the types of content to include in your search. You must provide a list of content types from the following options: `code` for searching within the code, `repo` for searching within the repository's general information, `pr` for searching within pull requests, and `issue` for searching within issues. This field is mandatory and allows tailoring the search to specific content types within the GitHub repository. 40 | 41 | ## Custom model and embeddings 42 | 43 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 44 | 45 | ```python 46 | tool = GithubSearchTool( 47 | config=dict( 48 | llm=dict( 49 | provider="ollama", # or google, openai, anthropic, llama2, ... 50 | config=dict( 51 | model="llama2", 52 | # temperature=0.5, 53 | # top_p=1, 54 | # stream=true, 55 | ), 56 | ), 57 | embedder=dict( 58 | provider="google", 59 | config=dict( 60 | model="models/embedding-001", 61 | task_type="retrieval_document", 62 | # title="Embeddings", 63 | ), 64 | ), 65 | ) 66 | ) 67 | ``` 68 | -------------------------------------------------------------------------------- /praisonai_tools/tools/github_search_tool/github_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, List, Optional, Type 2 | 3 | from embedchain.loaders.github import GithubLoader 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedGithubSearchToolSchema(BaseModel): 10 | """Input for GithubSearchTool.""" 11 | 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory search query you want to use to search the github repo's content", 15 | ) 16 | 17 | 18 | class GithubSearchToolSchema(FixedGithubSearchToolSchema): 19 | """Input for GithubSearchTool.""" 20 | 21 | github_repo: str = Field(..., description="Mandatory github you want to search") 22 | content_types: List[str] = Field( 23 | ..., 24 | description="Mandatory content types you want to be included search, options: [code, repo, pr, issue]", 25 | ) 26 | 27 | 28 | class GithubSearchTool(RagTool): 29 | name: str = "Search a github repo's content" 30 | description: str = "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities." 31 | summarize: bool = False 32 | gh_token: str 33 | args_schema: Type[BaseModel] = GithubSearchToolSchema 34 | content_types: List[str] 35 | 36 | def __init__(self, github_repo: Optional[str] = None, **kwargs): 37 | super().__init__(**kwargs) 38 | if github_repo is not None: 39 | self.add(repo=github_repo) 40 | self.description = f"A tool that can be used to semantic search a query the {github_repo} github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities." 41 | self.args_schema = FixedGithubSearchToolSchema 42 | self._generate_description() 43 | 44 | def add( 45 | self, 46 | repo: str, 47 | content_types: List[str] | None = None, 48 | **kwargs: Any, 49 | ) -> None: 50 | content_types = content_types or self.content_types 51 | 52 | kwargs["data_type"] = "github" 53 | kwargs["loader"] = GithubLoader(config={"token": self.gh_token}) 54 | super().add(f"repo:{repo} type:{','.join(content_types)}", **kwargs) 55 | 56 | def _before_run( 57 | self, 58 | query: str, 59 | **kwargs: Any, 60 | ) -> Any: 61 | if "github_repo" in kwargs: 62 | self.add( 63 | repo=kwargs["github_repo"], content_types=kwargs.get("content_types") 64 | ) 65 | 66 | def _run( 67 | self, 68 | search_query: str, 69 | **kwargs: Any, 70 | ) -> Any: 71 | return super()._run(query=search_query) 72 | -------------------------------------------------------------------------------- /praisonai_tools/tools/json_search_tool/README.md: -------------------------------------------------------------------------------- 1 | # JSONSearchTool 2 | 3 | ## Description 4 | This tool is used to perform a RAG search within a JSON file's content. It allows users to initiate a search with a specific JSON path, focusing the search operation within that particular JSON file. If the path is provided at initialization, the tool restricts its search scope to the specified JSON file, thereby enhancing the precision of search results. 5 | 6 | ## Installation 7 | Install the praisonai_tools package by executing the following command in your terminal: 8 | 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | ## Example 14 | Below are examples demonstrating how to use the JSONSearchTool for searching within JSON files. You can either search any JSON content or restrict the search to a specific JSON file. 15 | 16 | ```python 17 | from praisonai_tools import JSONSearchTool 18 | 19 | # Example 1: Initialize the tool for a general search across any JSON content. This is useful when the path is known or can be discovered during execution. 20 | tool = JSONSearchTool() 21 | 22 | # Example 2: Initialize the tool with a specific JSON path, limiting the search to a particular JSON file. 23 | tool = JSONSearchTool(json_path='./path/to/your/file.json') 24 | ``` 25 | 26 | ## Arguments 27 | - `json_path` (str): An optional argument that defines the path to the JSON file to be searched. This parameter is only necessary if the tool is initialized without a specific JSON path. Providing this argument restricts the search to the specified JSON file. 28 | 29 | ## Custom model and embeddings 30 | 31 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 32 | 33 | ```python 34 | tool = JSONSearchTool( 35 | config=dict( 36 | llm=dict( 37 | provider="ollama", # or google, openai, anthropic, llama2, ... 38 | config=dict( 39 | model="llama2", 40 | # temperature=0.5, 41 | # top_p=1, 42 | # stream=true, 43 | ), 44 | ), 45 | embedder=dict( 46 | provider="google", 47 | config=dict( 48 | model="models/embedding-001", 49 | task_type="retrieval_document", 50 | # title="Embeddings", 51 | ), 52 | ), 53 | ) 54 | ) 55 | ``` 56 | -------------------------------------------------------------------------------- /praisonai_tools/tools/json_search_tool/json_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | 3 | from embedchain.models.data_type import DataType 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedJSONSearchToolSchema(BaseModel): 10 | """Input for JSONSearchTool.""" 11 | 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory search query you want to use to search the JSON's content", 15 | ) 16 | 17 | 18 | class JSONSearchToolSchema(FixedJSONSearchToolSchema): 19 | """Input for JSONSearchTool.""" 20 | 21 | json_path: str = Field(..., description="Mandatory json path you want to search") 22 | 23 | 24 | class JSONSearchTool(RagTool): 25 | name: str = "Search a JSON's content" 26 | description: str = ( 27 | "A tool that can be used to semantic search a query from a JSON's content." 28 | ) 29 | args_schema: Type[BaseModel] = JSONSearchToolSchema 30 | 31 | def __init__(self, json_path: Optional[str] = None, **kwargs): 32 | super().__init__(**kwargs) 33 | if json_path is not None: 34 | self.add(json_path) 35 | self.description = f"A tool that can be used to semantic search a query the {json_path} JSON's content." 36 | self.args_schema = FixedJSONSearchToolSchema 37 | self._generate_description() 38 | 39 | def add( 40 | self, 41 | *args: Any, 42 | **kwargs: Any, 43 | ) -> None: 44 | kwargs["data_type"] = DataType.JSON 45 | super().add(*args, **kwargs) 46 | 47 | def _before_run( 48 | self, 49 | query: str, 50 | **kwargs: Any, 51 | ) -> Any: 52 | if "json_path" in kwargs: 53 | self.add(kwargs["json_path"]) 54 | 55 | def _run( 56 | self, 57 | search_query: str, 58 | **kwargs: Any, 59 | ) -> Any: 60 | return super()._run(query=search_query) 61 | -------------------------------------------------------------------------------- /praisonai_tools/tools/llamaindex_tool/README.md: -------------------------------------------------------------------------------- 1 | # LlamaIndexTool Documentation 2 | 3 | ## Description 4 | This tool is designed to be a general wrapper around LlamaIndex tools and query engines, enabling you to leverage LlamaIndex resources 5 | in terms of RAG/agentic pipelines as tools to plug into praisonai agents. 6 | 7 | ## Installation 8 | To incorporate this tool into your project, follow the installation instructions below: 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | ## Example 14 | The following example demonstrates how to initialize the tool and execute a search with a given query: 15 | 16 | ```python 17 | from praisonai_tools import LlamaIndexTool 18 | 19 | # Initialize the tool from a LlamaIndex Tool 20 | 21 | ## Example 1: Initialize from FunctionTool 22 | from llama_index.core.tools import FunctionTool 23 | 24 | your_python_function = lambda ...: ... 25 | og_tool = FunctionTool.from_defaults(your_python_function, name="", description='') 26 | tool = LlamaIndexTool.from_tool(og_tool) 27 | 28 | ## Example 2: Initialize from LlamaHub Tools 29 | from llama_index.tools.wolfram_alpha import WolframAlphaToolSpec 30 | wolfram_spec = WolframAlphaToolSpec(app_id="") 31 | wolfram_tools = wolfram_spec.to_tool_list() 32 | tools = [LlamaIndexTool.from_tool(t) for t in wolfram_tools] 33 | 34 | 35 | # Initialize Tool from a LlamaIndex Query Engine 36 | 37 | ## NOTE: LlamaIndex has a lot of query engines, define whatever query engine you want 38 | query_engine = index.as_query_engine() 39 | query_tool = LlamaIndexTool.from_query_engine( 40 | query_engine, 41 | name="Uber 2019 10K Query Tool", 42 | description="Use this tool to lookup the 2019 Uber 10K Annual Report" 43 | ) 44 | 45 | ``` 46 | 47 | ## Steps to Get Started 48 | To effectively use the `LlamaIndexTool`, follow these steps: 49 | 50 | 1. **Install praisonai**: Confirm that the `praisonai[tools]` package is installed in your Python environment. 51 | 2. **Install and use LlamaIndex**: Follow LlamaIndex documentation (https://docs.llamaindex.ai/) to setup a RAG/agent pipeline. 52 | 53 | 54 | -------------------------------------------------------------------------------- /praisonai_tools/tools/llamaindex_tool/llamaindex_tool.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import requests 4 | 5 | from typing import Type, Any, cast, Optional 6 | from pydantic.v1 import BaseModel, Field 7 | from praisonai_tools.tools.base_tool import BaseTool 8 | 9 | class LlamaIndexTool(BaseTool): 10 | """Tool to wrap LlamaIndex tools/query engines.""" 11 | llama_index_tool: Any 12 | 13 | def _run( 14 | self, 15 | *args: Any, 16 | **kwargs: Any, 17 | ) -> Any: 18 | """Run tool.""" 19 | from llama_index.core.tools import BaseTool as LlamaBaseTool 20 | tool = cast(LlamaBaseTool, self.llama_index_tool) 21 | return tool(*args, **kwargs) 22 | 23 | @classmethod 24 | def from_tool( 25 | cls, 26 | tool: Any, 27 | **kwargs: Any 28 | ) -> "LlamaIndexTool": 29 | from llama_index.core.tools import BaseTool as LlamaBaseTool 30 | 31 | if not isinstance(tool, LlamaBaseTool): 32 | raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}") 33 | tool = cast(LlamaBaseTool, tool) 34 | 35 | if tool.metadata.fn_schema is None: 36 | raise ValueError("The LlamaIndex tool does not have an fn_schema specified.") 37 | args_schema = cast(Type[BaseModel], tool.metadata.fn_schema) 38 | 39 | return cls( 40 | name=tool.metadata.name, 41 | description=tool.metadata.description, 42 | args_schema=args_schema, 43 | llama_index_tool=tool, 44 | **kwargs 45 | ) 46 | 47 | 48 | @classmethod 49 | def from_query_engine( 50 | cls, 51 | query_engine: Any, 52 | name: Optional[str] = None, 53 | description: Optional[str] = None, 54 | return_direct: bool = False, 55 | **kwargs: Any 56 | ) -> "LlamaIndexTool": 57 | from llama_index.core.query_engine import BaseQueryEngine 58 | from llama_index.core.tools import QueryEngineTool 59 | 60 | if not isinstance(query_engine, BaseQueryEngine): 61 | raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}") 62 | 63 | # NOTE: by default the schema expects an `input` variable. However this 64 | # confuses praisonai so we are renaming to `query`. 65 | class QueryToolSchema(BaseModel): 66 | """Schema for query tool.""" 67 | query: str = Field(..., description="Search query for the query tool.") 68 | 69 | # NOTE: setting `resolve_input_errors` to True is important because the schema expects `input` but we are using `query` 70 | query_engine_tool = QueryEngineTool.from_defaults( 71 | query_engine, 72 | name=name, 73 | description=description, 74 | return_direct=return_direct, 75 | resolve_input_errors=True, 76 | ) 77 | # HACK: we are replacing the schema with our custom schema 78 | query_engine_tool.metadata.fn_schema = QueryToolSchema 79 | 80 | return cls.from_tool( 81 | query_engine_tool, 82 | **kwargs 83 | ) 84 | -------------------------------------------------------------------------------- /praisonai_tools/tools/mdx_seach_tool/README.md: -------------------------------------------------------------------------------- 1 | # MDXSearchTool 2 | 3 | ## Description 4 | The MDX Search Tool, a key component of the `praisonai_tools` package, is designed for advanced market data extraction, offering invaluable support to researchers and analysts requiring immediate market insights in the AI sector. With its ability to interface with various data sources and tools, it streamlines the process of acquiring, reading, and organizing market data efficiently. 5 | 6 | ## Installation 7 | To utilize the MDX Search Tool, ensure the `praisonai_tools` package is installed. If not already present, install it using the following command: 8 | 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | ## Example 14 | Configuring and using the MDX Search Tool involves setting up environment variables and utilizing the tool within a praisonai project for market research. Here's a simple example: 15 | 16 | ```python 17 | from praisonai_tools import MDXSearchTool 18 | 19 | # Initialize the tool so the agent can search any MDX content if it learns about during its execution 20 | tool = MDXSearchTool() 21 | 22 | # OR 23 | 24 | # Initialize the tool with a specific MDX file path for exclusive search within that document 25 | tool = MDXSearchTool(mdx='path/to/your/document.mdx') 26 | ``` 27 | 28 | ## Arguments 29 | - mdx: **Optional** The MDX path for the search. Can be provided at initialization 30 | 31 | ## Custom model and embeddings 32 | 33 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 34 | 35 | ```python 36 | tool = MDXSearchTool( 37 | config=dict( 38 | llm=dict( 39 | provider="ollama", # or google, openai, anthropic, llama2, ... 40 | config=dict( 41 | model="llama2", 42 | # temperature=0.5, 43 | # top_p=1, 44 | # stream=true, 45 | ), 46 | ), 47 | embedder=dict( 48 | provider="google", 49 | config=dict( 50 | model="models/embedding-001", 51 | task_type="retrieval_document", 52 | # title="Embeddings", 53 | ), 54 | ), 55 | ) 56 | ) 57 | ``` 58 | -------------------------------------------------------------------------------- /praisonai_tools/tools/mdx_seach_tool/mdx_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | 3 | from embedchain.models.data_type import DataType 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedMDXSearchToolSchema(BaseModel): 10 | """Input for MDXSearchTool.""" 11 | 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory search query you want to use to search the MDX's content", 15 | ) 16 | 17 | 18 | class MDXSearchToolSchema(FixedMDXSearchToolSchema): 19 | """Input for MDXSearchTool.""" 20 | 21 | mdx: str = Field(..., description="Mandatory mdx path you want to search") 22 | 23 | 24 | class MDXSearchTool(RagTool): 25 | name: str = "Search a MDX's content" 26 | description: str = ( 27 | "A tool that can be used to semantic search a query from a MDX's content." 28 | ) 29 | args_schema: Type[BaseModel] = MDXSearchToolSchema 30 | 31 | def __init__(self, mdx: Optional[str] = None, **kwargs): 32 | super().__init__(**kwargs) 33 | if mdx is not None: 34 | self.add(mdx) 35 | self.description = f"A tool that can be used to semantic search a query the {mdx} MDX's content." 36 | self.args_schema = FixedMDXSearchToolSchema 37 | self._generate_description() 38 | 39 | def add( 40 | self, 41 | *args: Any, 42 | **kwargs: Any, 43 | ) -> None: 44 | kwargs["data_type"] = DataType.MDX 45 | super().add(*args, **kwargs) 46 | 47 | def _before_run( 48 | self, 49 | query: str, 50 | **kwargs: Any, 51 | ) -> Any: 52 | if "mdx" in kwargs: 53 | self.add(kwargs["mdx"]) 54 | 55 | def _run( 56 | self, 57 | search_query: str, 58 | **kwargs: Any, 59 | ) -> Any: 60 | return super()._run(query=search_query) 61 | -------------------------------------------------------------------------------- /praisonai_tools/tools/pdf_search_tool/README.md: -------------------------------------------------------------------------------- 1 | # PDFSearchTool 2 | 3 | ## Description 4 | The PDFSearchTool is a RAG tool designed for semantic searches within PDF content. It allows for inputting a search query and a PDF document, leveraging advanced search techniques to find relevant content efficiently. This capability makes it especially useful for extracting specific information from large PDF files quickly. 5 | 6 | ## Installation 7 | To get started with the PDFSearchTool, first, ensure the praisonai_tools package is installed with the following command: 8 | 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | ## Example 14 | Here's how to use the PDFSearchTool to search within a PDF document: 15 | 16 | ```python 17 | from praisonai_tools import PDFSearchTool 18 | 19 | # Initialize the tool allowing for any PDF content search if the path is provided during execution 20 | tool = PDFSearchTool() 21 | 22 | # OR 23 | 24 | # Initialize the tool with a specific PDF path for exclusive search within that document 25 | tool = PDFSearchTool(pdf='path/to/your/document.pdf') 26 | ``` 27 | 28 | ## Arguments 29 | - `pdf`: **Optinal** The PDF path for the search. Can be provided at initialization or within the `run` method's arguments. If provided at initialization, the tool confines its search to the specified document. 30 | 31 | ## Custom model and embeddings 32 | 33 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 34 | 35 | ```python 36 | tool = PDFSearchTool( 37 | config=dict( 38 | llm=dict( 39 | provider="ollama", # or google, openai, anthropic, llama2, ... 40 | config=dict( 41 | model="llama2", 42 | # temperature=0.5, 43 | # top_p=1, 44 | # stream=true, 45 | ), 46 | ), 47 | embedder=dict( 48 | provider="google", 49 | config=dict( 50 | model="models/embedding-001", 51 | task_type="retrieval_document", 52 | # title="Embeddings", 53 | ), 54 | ), 55 | ) 56 | ) 57 | ``` 58 | -------------------------------------------------------------------------------- /praisonai_tools/tools/pdf_search_tool/pdf_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | 3 | from embedchain.models.data_type import DataType 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedPDFSearchToolSchema(BaseModel): 10 | """Input for PDFSearchTool.""" 11 | 12 | query: str = Field( 13 | ..., description="Mandatory query you want to use to search the PDF's content" 14 | ) 15 | 16 | 17 | class PDFSearchToolSchema(FixedPDFSearchToolSchema): 18 | """Input for PDFSearchTool.""" 19 | 20 | pdf: str = Field(..., description="Mandatory pdf path you want to search") 21 | 22 | 23 | class PDFSearchTool(RagTool): 24 | name: str = "Search a PDF's content" 25 | description: str = ( 26 | "A tool that can be used to semantic search a query from a PDF's content." 27 | ) 28 | args_schema: Type[BaseModel] = PDFSearchToolSchema 29 | 30 | def __init__(self, pdf: Optional[str] = None, **kwargs): 31 | super().__init__(**kwargs) 32 | if pdf is not None: 33 | self.add(pdf) 34 | self.description = f"A tool that can be used to semantic search a query the {pdf} PDF's content." 35 | self.args_schema = FixedPDFSearchToolSchema 36 | self._generate_description() 37 | 38 | def add( 39 | self, 40 | *args: Any, 41 | **kwargs: Any, 42 | ) -> None: 43 | kwargs["data_type"] = DataType.PDF_FILE 44 | super().add(*args, **kwargs) 45 | 46 | def _before_run( 47 | self, 48 | query: str, 49 | **kwargs: Any, 50 | ) -> Any: 51 | if "pdf" in kwargs: 52 | self.add(kwargs["pdf"]) 53 | -------------------------------------------------------------------------------- /praisonai_tools/tools/pdf_text_writing_tool/pdf_text_writing_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | from pydantic import BaseModel, Field 3 | from pypdf import PdfReader, PdfWriter, PageObject, ContentStream, NameObject, Font 4 | from pathlib import Path 5 | 6 | 7 | class PDFTextWritingToolSchema(BaseModel): 8 | """Input schema for PDFTextWritingTool.""" 9 | pdf_path: str = Field(..., description="Path to the PDF file to modify") 10 | text: str = Field(..., description="Text to add to the PDF") 11 | position: tuple = Field(..., description="Tuple of (x, y) coordinates for text placement") 12 | font_size: int = Field(default=12, description="Font size of the text") 13 | font_color: str = Field(default="0 0 0 rg", description="RGB color code for the text") 14 | font_name: Optional[str] = Field(default="F1", description="Font name for standard fonts") 15 | font_file: Optional[str] = Field(None, description="Path to a .ttf font file for custom font usage") 16 | page_number: int = Field(default=0, description="Page number to add text to") 17 | 18 | 19 | class PDFTextWritingTool(RagTool): 20 | """A tool to add text to specific positions in a PDF, with custom font support.""" 21 | name: str = "PDF Text Writing Tool" 22 | description: str = "A tool that can write text to a specific position in a PDF document, with optional custom font embedding." 23 | args_schema: Type[BaseModel] = PDFTextWritingToolSchema 24 | 25 | def run(self, pdf_path: str, text: str, position: tuple, font_size: int, font_color: str, 26 | font_name: str = "F1", font_file: Optional[str] = None, page_number: int = 0, **kwargs) -> str: 27 | reader = PdfReader(pdf_path) 28 | writer = PdfWriter() 29 | 30 | if page_number >= len(reader.pages): 31 | return "Page number out of range." 32 | 33 | page: PageObject = reader.pages[page_number] 34 | content = ContentStream(page["/Contents"].data, reader) 35 | 36 | if font_file: 37 | # Check if the font file exists 38 | if not Path(font_file).exists(): 39 | return "Font file does not exist." 40 | 41 | # Embed the custom font 42 | font_name = self.embed_font(writer, font_file) 43 | 44 | # Prepare text operation with the custom or standard font 45 | x_position, y_position = position 46 | text_operation = f"BT /{font_name} {font_size} Tf {x_position} {y_position} Td ({text}) Tj ET" 47 | content.operations.append([font_color]) # Set color 48 | content.operations.append([text_operation]) # Add text 49 | 50 | # Replace old content with new content 51 | page[NameObject("/Contents")] = content 52 | writer.add_page(page) 53 | 54 | # Save the new PDF 55 | output_pdf_path = "modified_output.pdf" 56 | with open(output_pdf_path, "wb") as out_file: 57 | writer.write(out_file) 58 | 59 | return f"Text added to {output_pdf_path} successfully." 60 | 61 | def embed_font(self, writer: PdfWriter, font_file: str) -> str: 62 | """Embeds a TTF font into the PDF and returns the font name.""" 63 | with open(font_file, "rb") as file: 64 | font = Font.true_type(file.read()) 65 | font_ref = writer.add_object(font) 66 | return font_ref -------------------------------------------------------------------------------- /praisonai_tools/tools/pg_seach_tool/README.md: -------------------------------------------------------------------------------- 1 | # PGSearchTool 2 | 3 | ## Description 4 | This tool is designed to facilitate semantic searches within PostgreSQL database tables. Leveraging the RAG (Retrieve and Generate) technology, the PGSearchTool provides users with an efficient means of querying database table content, specifically tailored for PostgreSQL databases. It simplifies the process of finding relevant data through semantic search queries, making it an invaluable resource for users needing to perform advanced queries on extensive datasets within a PostgreSQL database. 5 | 6 | ## Installation 7 | To install the `praisonai_tools` package and utilize the PGSearchTool, execute the following command in your terminal: 8 | 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | ## Example 14 | Below is an example showcasing how to use the PGSearchTool to conduct a semantic search on a table within a PostgreSQL database: 15 | 16 | ```python 17 | from praisonai_tools import PGSearchTool 18 | 19 | # Initialize the tool with the database URI and the target table name 20 | tool = PGSearchTool(db_uri='postgresql://user:password@localhost:5432/mydatabase', table_name='employees') 21 | 22 | ``` 23 | 24 | ## Arguments 25 | The PGSearchTool requires the following arguments for its operation: 26 | 27 | - `db_uri`: A string representing the URI of the PostgreSQL database to be queried. This argument is mandatory and must include the necessary authentication details and the location of the database. 28 | - `table_name`: A string specifying the name of the table within the database on which the semantic search will be performed. This argument is mandatory. 29 | 30 | ## Custom model and embeddings 31 | 32 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 33 | 34 | ```python 35 | tool = PGSearchTool( 36 | config=dict( 37 | llm=dict( 38 | provider="ollama", # or google, openai, anthropic, llama2, ... 39 | config=dict( 40 | model="llama2", 41 | # temperature=0.5, 42 | # top_p=1, 43 | # stream=true, 44 | ), 45 | ), 46 | embedder=dict( 47 | provider="google", 48 | config=dict( 49 | model="models/embedding-001", 50 | task_type="retrieval_document", 51 | # title="Embeddings", 52 | ), 53 | ), 54 | ) 55 | ) 56 | ``` 57 | -------------------------------------------------------------------------------- /praisonai_tools/tools/pg_seach_tool/pg_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Type 2 | 3 | from embedchain.loaders.postgres import PostgresLoader 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class PGSearchToolSchema(BaseModel): 10 | """Input for PGSearchTool.""" 11 | 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory semantic search query you want to use to search the database's content", 15 | ) 16 | 17 | 18 | class PGSearchTool(RagTool): 19 | name: str = "Search a database's table content" 20 | description: str = "A tool that can be used to semantic search a query from a database table's content." 21 | args_schema: Type[BaseModel] = PGSearchToolSchema 22 | db_uri: str = Field(..., description="Mandatory database URI") 23 | 24 | def __init__(self, table_name: str, **kwargs): 25 | super().__init__(**kwargs) 26 | self.add(table_name) 27 | self.description = f"A tool that can be used to semantic search a query the {table_name} database table's content." 28 | self._generate_description() 29 | 30 | def add( 31 | self, 32 | table_name: str, 33 | **kwargs: Any, 34 | ) -> None: 35 | kwargs["data_type"] = "postgres" 36 | kwargs["loader"] = PostgresLoader(config=dict(url=self.db_uri)) 37 | super().add(f"SELECT * FROM {table_name};", **kwargs) 38 | 39 | def _run( 40 | self, 41 | search_query: str, 42 | **kwargs: Any, 43 | ) -> Any: 44 | return super()._run(query=search_query) 45 | -------------------------------------------------------------------------------- /praisonai_tools/tools/rag/README.md: -------------------------------------------------------------------------------- 1 | # RagTool: A Dynamic Knowledge Base Tool 2 | 3 | RagTool is designed to answer questions by leveraging the power of RAG by leveraging (EmbedChain). It integrates seamlessly with the praisonai ecosystem, offering a versatile and powerful solution for information retrieval. 4 | 5 | ## **Overview** 6 | 7 | RagTool enables users to dynamically query a knowledge base, making it an ideal tool for applications requiring access to a vast array of information. Its flexible design allows for integration with various data sources, including files, directories, web pages, yoututbe videos and custom configurations. 8 | 9 | ## **Usage** 10 | 11 | RagTool can be instantiated with data from different sources, including: 12 | 13 | - 📰 PDF file 14 | - 📊 CSV file 15 | - 📃 JSON file 16 | - 📝 Text 17 | - 📁 Directory/ Folder 18 | - 🌐 HTML Web page 19 | - 📽️ Youtube Channel 20 | - 📺 Youtube Video 21 | - 📚 Docs website 22 | - 📝 MDX file 23 | - 📄 DOCX file 24 | - 🧾 XML file 25 | - 📬 Gmail 26 | - 📝 Github 27 | - 🐘 Postgres 28 | - 🐬 MySQL 29 | - 🤖 Slack 30 | - 💬 Discord 31 | - 🗨️ Discourse 32 | - 📝 Substack 33 | - 🐝 Beehiiv 34 | - 💾 Dropbox 35 | - 🖼️ Image 36 | - ⚙️ Custom 37 | 38 | #### **Creating an Instance** 39 | 40 | ```python 41 | from praisonai_tools.tools.rag_tool import RagTool 42 | 43 | # Example: Loading from a file 44 | rag_tool = RagTool().from_file('path/to/your/file.txt') 45 | 46 | # Example: Loading from a directory 47 | rag_tool = RagTool().from_directory('path/to/your/directory') 48 | 49 | # Example: Loading from a web page 50 | rag_tool = RagTool().from_web_page('https://example.com') 51 | ``` 52 | 53 | ## **Contribution** 54 | 55 | Contributions to RagTool and the broader praisonai tools ecosystem are welcome. To contribute, please follow the standard GitHub workflow for forking the repository, making changes, and submitting a pull request. 56 | 57 | ## **License** 58 | 59 | RagTool is open-source and available under the MIT license. 60 | 61 | Thank you for considering RagTool for your knowledge base needs. Your contributions and feedback are invaluable to making RagTool even better. 62 | -------------------------------------------------------------------------------- /praisonai_tools/tools/rag/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MervinPraison/PraisonAI-Tools/2049b39d645f3e1ee9160e5a4c74f847996a9bfb/praisonai_tools/tools/rag/__init__.py -------------------------------------------------------------------------------- /praisonai_tools/tools/rag/rag_tool.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Any 3 | 4 | from pydantic import BaseModel, Field, model_validator 5 | 6 | from praisonai_tools.tools.base_tool import BaseTool 7 | 8 | 9 | class Adapter(BaseModel, ABC): 10 | class Config: 11 | arbitrary_types_allowed = True 12 | 13 | @abstractmethod 14 | def query(self, question: str) -> str: 15 | """Query the knowledge base with a question and return the answer.""" 16 | 17 | @abstractmethod 18 | def add( 19 | self, 20 | *args: Any, 21 | **kwargs: Any, 22 | ) -> None: 23 | """Add content to the knowledge base.""" 24 | 25 | 26 | class RagTool(BaseTool): 27 | class _AdapterPlaceholder(Adapter): 28 | def query(self, question: str) -> str: 29 | raise NotImplementedError 30 | 31 | def add(self, *args: Any, **kwargs: Any) -> None: 32 | raise NotImplementedError 33 | 34 | name: str = "Knowledge base" 35 | description: str = "A knowledge base that can be used to answer questions." 36 | summarize: bool = False 37 | adapter: Adapter = Field(default_factory=_AdapterPlaceholder) 38 | config: dict[str, Any] | None = None 39 | 40 | @model_validator(mode="after") 41 | def _set_default_adapter(self): 42 | if isinstance(self.adapter, RagTool._AdapterPlaceholder): 43 | from embedchain import App 44 | 45 | from praisonai_tools.adapters.embedchain_adapter import EmbedchainAdapter 46 | 47 | app = App.from_config(config=self.config) if self.config else App() 48 | self.adapter = EmbedchainAdapter( 49 | embedchain_app=app, summarize=self.summarize 50 | ) 51 | 52 | return self 53 | 54 | def add( 55 | self, 56 | *args: Any, 57 | **kwargs: Any, 58 | ) -> None: 59 | self.adapter.add(*args, **kwargs) 60 | 61 | def _run( 62 | self, 63 | query: str, 64 | **kwargs: Any, 65 | ) -> Any: 66 | self._before_run(query, **kwargs) 67 | 68 | return f"Relevant Content:\n{self.adapter.query(query)}" 69 | 70 | def _before_run(self, query, **kwargs): 71 | pass 72 | -------------------------------------------------------------------------------- /praisonai_tools/tools/scrape_element_from_website/scrape_element_from_website.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | from bs4 import BeautifulSoup 4 | from typing import Optional, Type, Any 5 | from pydantic.v1 import BaseModel, Field 6 | from ..base_tool import BaseTool 7 | 8 | class FixedScrapeElementFromWebsiteToolSchema(BaseModel): 9 | """Input for ScrapeElementFromWebsiteTool.""" 10 | pass 11 | 12 | class ScrapeElementFromWebsiteToolSchema(FixedScrapeElementFromWebsiteToolSchema): 13 | """Input for ScrapeElementFromWebsiteTool.""" 14 | website_url: str = Field(..., description="Mandatory website url to read the file") 15 | css_element: str = Field(..., description="Mandatory css reference for element to scrape from the website") 16 | 17 | class ScrapeElementFromWebsiteTool(BaseTool): 18 | name: str = "Read a website content" 19 | description: str = "A tool that can be used to read a website content." 20 | args_schema: Type[BaseModel] = ScrapeElementFromWebsiteToolSchema 21 | website_url: Optional[str] = None 22 | cookies: Optional[dict] = None 23 | css_element: Optional[str] = None 24 | headers: Optional[dict] = { 25 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36', 26 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 27 | 'Accept-Language': 'en-US,en;q=0.9', 28 | 'Referer': 'https://www.google.com/', 29 | 'Connection': 'keep-alive', 30 | 'Upgrade-Insecure-Requests': '1', 31 | 'Accept-Encoding': 'gzip, deflate, br' 32 | } 33 | 34 | def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, css_element: Optional[str] = None, **kwargs): 35 | super().__init__(**kwargs) 36 | if website_url is not None: 37 | self.website_url = website_url 38 | self.css_element = css_element 39 | self.description = f"A tool that can be used to read {website_url}'s content." 40 | self.args_schema = FixedScrapeElementFromWebsiteToolSchema 41 | self._generate_description() 42 | if cookies is not None: 43 | self.cookies = {cookies["name"]: os.getenv(cookies["value"])} 44 | 45 | def _run( 46 | self, 47 | **kwargs: Any, 48 | ) -> Any: 49 | website_url = kwargs.get('website_url', self.website_url) 50 | css_element = kwargs.get('css_element', self.css_element) 51 | page = requests.get(website_url, headers=self.headers, cookies=self.cookies if self.cookies else {}) 52 | parsed = BeautifulSoup(page.content, "html.parser") 53 | elements = parsed.select(css_element) 54 | return "\n".join([element.get_text() for element in elements]) 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /praisonai_tools/tools/scrape_website_tool/README.md: -------------------------------------------------------------------------------- 1 | # ScrapeWebsiteTool 2 | 3 | ## Description 4 | A tool designed to extract and read the content of a specified website. It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content. This tool can be particularly useful for web scraping tasks, data collection, or extracting specific information from websites. 5 | 6 | ## Installation 7 | Install the praisonai_tools package 8 | ```shell 9 | pip install 'praisonai[tools]' 10 | ``` 11 | 12 | ## Example 13 | ```python 14 | from praisonai_tools import ScrapeWebsiteTool 15 | 16 | # To enable scrapping any website it finds during it's execution 17 | tool = ScrapeWebsiteTool() 18 | 19 | # Initialize the tool with the website URL, so the agent can only scrap the content of the specified website 20 | tool = ScrapeWebsiteTool(website_url='https://www.example.com') 21 | ``` 22 | 23 | ## Arguments 24 | - `website_url` : Mandatory website URL to read the file. This is the primary input for the tool, specifying which website's content should be scraped and read. -------------------------------------------------------------------------------- /praisonai_tools/tools/scrape_website_tool/scrape_website_tool.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | from bs4 import BeautifulSoup 4 | from typing import Optional, Type, Any 5 | from pydantic.v1 import BaseModel, Field 6 | from ..base_tool import BaseTool 7 | 8 | class FixedScrapeWebsiteToolSchema(BaseModel): 9 | """Input for ScrapeWebsiteTool.""" 10 | pass 11 | 12 | class ScrapeWebsiteToolSchema(FixedScrapeWebsiteToolSchema): 13 | """Input for ScrapeWebsiteTool.""" 14 | website_url: str = Field(..., description="Mandatory website url to read the file") 15 | 16 | class ScrapeWebsiteTool(BaseTool): 17 | name: str = "Read website content" 18 | description: str = "A tool that can be used to read a website content." 19 | args_schema: Type[BaseModel] = ScrapeWebsiteToolSchema 20 | website_url: Optional[str] = None 21 | cookies: Optional[dict] = None 22 | headers: Optional[dict] = { 23 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36', 24 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', 25 | 'Accept-Language': 'en-US,en;q=0.9', 26 | 'Referer': 'https://www.google.com/', 27 | 'Connection': 'keep-alive', 28 | 'Upgrade-Insecure-Requests': '1', 29 | 'Accept-Encoding': 'gzip, deflate, br' 30 | } 31 | 32 | def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, **kwargs): 33 | super().__init__(**kwargs) 34 | if website_url is not None: 35 | self.website_url = website_url 36 | self.description = f"A tool that can be used to read {website_url}'s content." 37 | self.args_schema = FixedScrapeWebsiteToolSchema 38 | self._generate_description() 39 | if cookies is not None: 40 | self.cookies = {cookies["name"]: os.getenv(cookies["value"])} 41 | 42 | def _run( 43 | self, 44 | **kwargs: Any, 45 | ) -> Any: 46 | website_url = kwargs.get('website_url', self.website_url) 47 | page = requests.get( 48 | website_url, 49 | timeout=15, 50 | headers=self.headers, 51 | cookies=self.cookies if self.cookies else {} 52 | ) 53 | parsed = BeautifulSoup(page.content, "html.parser") 54 | text = parsed.get_text() 55 | text = '\n'.join([i for i in text.split('\n') if i.strip() != '']) 56 | text = ' '.join([i for i in text.split(' ') if i.strip() != '']) 57 | return text 58 | 59 | -------------------------------------------------------------------------------- /praisonai_tools/tools/selenium_scraping_tool/README.md: -------------------------------------------------------------------------------- 1 | # SeleniumScrapingTool 2 | 3 | ## Description 4 | This tool is designed for efficient web scraping, enabling users to extract content from web pages. It supports targeted scraping by allowing the specification of a CSS selector for desired elements. The flexibility of the tool enables it to be used on any website URL provided by the user, making it a versatile tool for various web scraping needs. 5 | 6 | ## Installation 7 | Install the praisonai_tools package 8 | ``` 9 | pip install 'praisonai[tools]' 10 | ``` 11 | 12 | ## Example 13 | ```python 14 | from praisonai_tools import SeleniumScrapingTool 15 | 16 | # Example 1: Scrape any website it finds during its execution 17 | tool = SeleniumScrapingTool() 18 | 19 | # Example 2: Scrape the entire webpage 20 | tool = SeleniumScrapingTool(website_url='https://example.com') 21 | 22 | # Example 3: Scrape a specific CSS element from the webpage 23 | tool = SeleniumScrapingTool(website_url='https://example.com', css_element='.main-content') 24 | 25 | # Example 4: Scrape using optional parameters for customized scraping 26 | tool = SeleniumScrapingTool(website_url='https://example.com', css_element='.main-content', cookie={'name': 'user', 'value': 'John Doe'}) 27 | ``` 28 | 29 | ## Arguments 30 | - `website_url`: Mandatory. The URL of the website to scrape. 31 | - `css_element`: Mandatory. The CSS selector for a specific element to scrape from the website. 32 | - `cookie`: Optional. A dictionary containing cookie information. This parameter allows the tool to simulate a session with cookie information, providing access to content that may be restricted to logged-in users. 33 | - `wait_time`: Optional. The number of seconds the tool waits after loading the website and after setting a cookie, before scraping the content. This allows for dynamic content to load properly. 34 | -------------------------------------------------------------------------------- /praisonai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Type, Any 2 | import time 3 | from pydantic.v1 import BaseModel, Field 4 | 5 | from bs4 import BeautifulSoup 6 | from selenium import webdriver 7 | from selenium.webdriver.common.by import By 8 | from selenium.webdriver.firefox.options import Options 9 | 10 | from ..base_tool import BaseTool 11 | 12 | class FixedSeleniumScrapingToolSchema(BaseModel): 13 | """Input for SeleniumScrapingTool.""" 14 | pass 15 | 16 | class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema): 17 | """Input for SeleniumScrapingTool.""" 18 | website_url: str = Field(..., description="Mandatory website url to read the file") 19 | css_element: str = Field(..., description="Mandatory css reference for element to scrape from the website") 20 | 21 | class SeleniumScrapingTool(BaseTool): 22 | name: str = "Read a website content" 23 | description: str = "A tool that can be used to read a website content." 24 | args_schema: Type[BaseModel] = SeleniumScrapingToolSchema 25 | website_url: Optional[str] = None 26 | driver: Optional[Any] = webdriver.Chrome 27 | cookie: Optional[dict] = None 28 | wait_time: Optional[int] = 3 29 | css_element: Optional[str] = None 30 | 31 | def __init__(self, website_url: Optional[str] = None, cookie: Optional[dict] = None, css_element: Optional[str] = None, **kwargs): 32 | super().__init__(**kwargs) 33 | if cookie is not None: 34 | self.cookie = cookie 35 | 36 | if css_element is not None: 37 | self.css_element = css_element 38 | 39 | if website_url is not None: 40 | self.website_url = website_url 41 | self.description = f"A tool that can be used to read {website_url}'s content." 42 | self.args_schema = FixedSeleniumScrapingToolSchema 43 | 44 | self._generate_description() 45 | def _run( 46 | self, 47 | **kwargs: Any, 48 | ) -> Any: 49 | website_url = kwargs.get('website_url', self.website_url) 50 | css_element = kwargs.get('css_element', self.css_element) 51 | driver = self._create_driver(website_url, self.cookie, self.wait_time) 52 | 53 | content = [] 54 | if css_element is None or css_element.strip() == "": 55 | body_text = driver.find_element(By.TAG_NAME, "body").text 56 | content.append(body_text) 57 | else: 58 | for element in driver.find_elements(By.CSS_SELECTOR, css_element): 59 | content.append(element.text) 60 | driver.close() 61 | return "\n".join(content) 62 | 63 | def _create_driver(self, url, cookie, wait_time): 64 | options = Options() 65 | options.add_argument("--headless") 66 | driver = self.driver(options=options) 67 | driver.get(url) 68 | time.sleep(wait_time) 69 | if cookie: 70 | driver.add_cookie(cookie) 71 | time.sleep(wait_time) 72 | driver.get(url) 73 | time.sleep(wait_time) 74 | return driver 75 | 76 | def close(self): 77 | self.driver.close() -------------------------------------------------------------------------------- /praisonai_tools/tools/serper_dev_tool/README.md: -------------------------------------------------------------------------------- 1 | # SerperDevTool Documentation 2 | 3 | ## Description 4 | This tool is designed to perform a semantic search for a specified query from a text's content across the internet. It utilizes the `serper.dev` API to fetch and display the most relevant search results based on the query provided by the user. 5 | 6 | ## Installation 7 | To incorporate this tool into your project, follow the installation instructions below: 8 | ```shell 9 | pip install 'praisonai[tools]' 10 | ``` 11 | 12 | ## Example 13 | The following example demonstrates how to initialize the tool and execute a search with a given query: 14 | 15 | ```python 16 | from praisonai_tools import SerperDevTool 17 | 18 | # Initialize the tool for internet searching capabilities 19 | tool = SerperDevTool() 20 | ``` 21 | 22 | ## Steps to Get Started 23 | To effectively use the `SerperDevTool`, follow these steps: 24 | 25 | 1. **Package Installation**: Confirm that the `praisonai[tools]` package is installed in your Python environment. 26 | 2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at `serper.dev`. 27 | 3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPER_API_KEY` to facilitate its use by the tool. 28 | 29 | ## Conclusion 30 | By integrating the `SerperDevTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward. 31 | -------------------------------------------------------------------------------- /praisonai_tools/tools/serper_dev_tool/serper_dev_tool.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import requests 4 | 5 | from typing import Type, Any 6 | from pydantic.v1 import BaseModel, Field 7 | from praisonai_tools.tools.base_tool import BaseTool 8 | 9 | class SerperDevToolSchema(BaseModel): 10 | """Input for SerperDevTool.""" 11 | search_query: str = Field(..., description="Mandatory search query you want to use to search the internet") 12 | 13 | class SerperDevTool(BaseTool): 14 | name: str = "Search the internet" 15 | description: str = "A tool that can be used to search the internet with a search_query." 16 | args_schema: Type[BaseModel] = SerperDevToolSchema 17 | search_url: str = "https://google.serper.dev/search" 18 | n_results: int = 10 19 | 20 | def _run( 21 | self, 22 | **kwargs: Any, 23 | ) -> Any: 24 | search_query = kwargs.get('search_query') 25 | if search_query is None: 26 | search_query = kwargs.get('query') 27 | 28 | payload = json.dumps({"q": search_query}) 29 | headers = { 30 | 'X-API-KEY': os.environ['SERPER_API_KEY'], 31 | 'content-type': 'application/json' 32 | } 33 | response = requests.request("POST", self.search_url, headers=headers, data=payload) 34 | results = response.json() 35 | if 'organic' in results: 36 | results = results['organic'] 37 | string = [] 38 | for result in results: 39 | try: 40 | string.append('\n'.join([ 41 | f"Title: {result['title']}", 42 | f"Link: {result['link']}", 43 | f"Snippet: {result['snippet']}", 44 | "---" 45 | ])) 46 | except KeyError: 47 | next 48 | 49 | content = '\n'.join(string) 50 | return f"\nSearch results: {content}\n" 51 | else: 52 | return results 53 | -------------------------------------------------------------------------------- /praisonai_tools/tools/txt_search_tool/README.md: -------------------------------------------------------------------------------- 1 | # TXTSearchTool 2 | 3 | ## Description 4 | This tool is used to perform a RAG (Retrieval-Augmented Generation) search within the content of a text file. It allows for semantic searching of a query within a specified text file's content, making it an invaluable resource for quickly extracting information or finding specific sections of text based on the query provided. 5 | 6 | ## Installation 7 | To use the TXTSearchTool, you first need to install the praisonai_tools package. This can be done using pip, a package manager for Python. Open your terminal or command prompt and enter the following command: 8 | 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | This command will download and install the TXTSearchTool along with any necessary dependencies. 14 | 15 | ## Example 16 | The following example demonstrates how to use the TXTSearchTool to search within a text file. This example shows both the initialization of the tool with a specific text file and the subsequent search within that file's content. 17 | 18 | ```python 19 | from praisonai_tools import TXTSearchTool 20 | 21 | # Initialize the tool to search within any text file's content the agent learns about during its execution 22 | tool = TXTSearchTool() 23 | 24 | # OR 25 | 26 | # Initialize the tool with a specific text file, so the agent can search within the given text file's content 27 | tool = TXTSearchTool(txt='path/to/text/file.txt') 28 | ``` 29 | 30 | ## Arguments 31 | - `txt` (str): **Optinal**. The path to the text file you want to search. This argument is only required if the tool was not initialized with a specific text file; otherwise, the search will be conducted within the initially provided text file. 32 | 33 | ## Custom model and embeddings 34 | 35 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 36 | 37 | ```python 38 | tool = TXTSearchTool( 39 | config=dict( 40 | llm=dict( 41 | provider="ollama", # or google, openai, anthropic, llama2, ... 42 | config=dict( 43 | model="llama2", 44 | # temperature=0.5, 45 | # top_p=1, 46 | # stream=true, 47 | ), 48 | ), 49 | embedder=dict( 50 | provider="google", 51 | config=dict( 52 | model="models/embedding-001", 53 | task_type="retrieval_document", 54 | # title="Embeddings", 55 | ), 56 | ), 57 | ) 58 | ) 59 | ``` 60 | -------------------------------------------------------------------------------- /praisonai_tools/tools/txt_search_tool/txt_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | 3 | from embedchain.models.data_type import DataType 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedTXTSearchToolSchema(BaseModel): 10 | """Input for TXTSearchTool.""" 11 | 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory search query you want to use to search the txt's content", 15 | ) 16 | 17 | 18 | class TXTSearchToolSchema(FixedTXTSearchToolSchema): 19 | """Input for TXTSearchTool.""" 20 | 21 | txt: str = Field(..., description="Mandatory txt path you want to search") 22 | 23 | 24 | class TXTSearchTool(RagTool): 25 | name: str = "Search a txt's content" 26 | description: str = ( 27 | "A tool that can be used to semantic search a query from a txt's content." 28 | ) 29 | args_schema: Type[BaseModel] = TXTSearchToolSchema 30 | 31 | def __init__(self, txt: Optional[str] = None, **kwargs): 32 | super().__init__(**kwargs) 33 | if txt is not None: 34 | self.add(txt) 35 | self.description = f"A tool that can be used to semantic search a query the {txt} txt's content." 36 | self.args_schema = FixedTXTSearchToolSchema 37 | self._generate_description() 38 | 39 | def add( 40 | self, 41 | *args: Any, 42 | **kwargs: Any, 43 | ) -> None: 44 | kwargs["data_type"] = DataType.TEXT_FILE 45 | super().add(*args, **kwargs) 46 | 47 | def _before_run( 48 | self, 49 | query: str, 50 | **kwargs: Any, 51 | ) -> Any: 52 | if "txt" in kwargs: 53 | self.add(kwargs["txt"]) 54 | 55 | def _run( 56 | self, 57 | search_query: str, 58 | **kwargs: Any, 59 | ) -> Any: 60 | return super()._run(query=search_query) 61 | -------------------------------------------------------------------------------- /praisonai_tools/tools/website_search/README.md: -------------------------------------------------------------------------------- 1 | # WebsiteSearchTool 2 | 3 | ## Description 4 | This tool is specifically crafted for conducting semantic searches within the content of a particular website. Leveraging a Retrieval-Augmented Generation (RAG) model, it navigates through the information provided on a given URL. Users have the flexibility to either initiate a search across any website known or discovered during its usage or to concentrate the search on a predefined, specific website. 5 | 6 | ## Installation 7 | Install the praisonai_tools package by executing the following command in your terminal: 8 | 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | ## Example 14 | To utilize the WebsiteSearchTool for different use cases, follow these examples: 15 | 16 | ```python 17 | from praisonai_tools import WebsiteSearchTool 18 | 19 | # To enable the tool to search any website the agent comes across or learns about during its operation 20 | tool = WebsiteSearchTool() 21 | 22 | # OR 23 | 24 | # To restrict the tool to only search within the content of a specific website. 25 | tool = WebsiteSearchTool(website='https://example.com') 26 | ``` 27 | 28 | ## Arguments 29 | - `website` : An optional argument that specifies the valid website URL to perform the search on. This becomes necessary if the tool is initialized without a specific website. In the `WebsiteSearchToolSchema`, this argument is mandatory. However, in the `FixedWebsiteSearchToolSchema`, it becomes optional if a website is provided during the tool's initialization, as it will then only search within the predefined website's content. 30 | 31 | ## Custom model and embeddings 32 | 33 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 34 | 35 | ```python 36 | tool = WebsiteSearchTool( 37 | config=dict( 38 | llm=dict( 39 | provider="ollama", # or google, openai, anthropic, llama2, ... 40 | config=dict( 41 | model="llama2", 42 | # temperature=0.5, 43 | # top_p=1, 44 | # stream=true, 45 | ), 46 | ), 47 | embedder=dict( 48 | provider="google", 49 | config=dict( 50 | model="models/embedding-001", 51 | task_type="retrieval_document", 52 | # title="Embeddings", 53 | ), 54 | ), 55 | ) 56 | ) 57 | ``` 58 | -------------------------------------------------------------------------------- /praisonai_tools/tools/website_search/website_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | 3 | from embedchain.models.data_type import DataType 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedWebsiteSearchToolSchema(BaseModel): 10 | """Input for WebsiteSearchTool.""" 11 | 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory search query you want to use to search a specific website", 15 | ) 16 | 17 | 18 | class WebsiteSearchToolSchema(FixedWebsiteSearchToolSchema): 19 | """Input for WebsiteSearchTool.""" 20 | 21 | website: str = Field( 22 | ..., description="Mandatory valid website URL you want to search on" 23 | ) 24 | 25 | 26 | class WebsiteSearchTool(RagTool): 27 | name: str = "Search in a specific website" 28 | description: str = "A tool that can be used to semantic search a query from a specific URL content." 29 | args_schema: Type[BaseModel] = WebsiteSearchToolSchema 30 | 31 | def __init__(self, website: Optional[str] = None, **kwargs): 32 | super().__init__(**kwargs) 33 | if website is not None: 34 | self.add(website) 35 | self.description = f"A tool that can be used to semantic search a query from {website} website content." 36 | self.args_schema = FixedWebsiteSearchToolSchema 37 | self._generate_description() 38 | 39 | def add( 40 | self, 41 | *args: Any, 42 | **kwargs: Any, 43 | ) -> None: 44 | kwargs["data_type"] = DataType.WEB_PAGE 45 | super().add(*args, **kwargs) 46 | 47 | def _before_run( 48 | self, 49 | query: str, 50 | **kwargs: Any, 51 | ) -> Any: 52 | if "website" in kwargs: 53 | self.add(kwargs["website"]) 54 | 55 | def _run( 56 | self, 57 | search_query: str, 58 | **kwargs: Any, 59 | ) -> Any: 60 | return super()._run(query=search_query) 61 | -------------------------------------------------------------------------------- /praisonai_tools/tools/xml_search_tool/README.md: -------------------------------------------------------------------------------- 1 | # XMLSearchTool 2 | 3 | ## Description 4 | The XMLSearchTool is a cutting-edge RAG tool engineered for conducting semantic searches within XML files. Ideal for users needing to parse and extract information from XML content efficiently, this tool supports inputting a search query and an optional XML file path. By specifying an XML path, users can target their search more precisely to the content of that file, thereby obtaining more relevant search outcomes. 5 | 6 | ## Installation 7 | To start using the XMLSearchTool, you must first install the praisonai_tools package. This can be easily done with the following command: 8 | 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | ## Example 14 | Here are two examples demonstrating how to use the XMLSearchTool. The first example shows searching within a specific XML file, while the second example illustrates initiating a search without predefining an XML path, providing flexibility in search scope. 15 | 16 | ```python 17 | from praisonai_tools.tools.xml_search_tool import XMLSearchTool 18 | 19 | # Allow agents to search within any XML file's content as it learns about their paths during execution 20 | tool = XMLSearchTool() 21 | 22 | # OR 23 | 24 | # Initialize the tool with a specific XML file path for exclusive search within that document 25 | tool = XMLSearchTool(xml='path/to/your/xmlfile.xml') 26 | ``` 27 | 28 | ## Arguments 29 | - `xml`: This is the path to the XML file you wish to search. It is an optional parameter during the tool's initialization but must be provided either at initialization or as part of the `run` method's arguments to execute a search. 30 | 31 | ## Custom model and embeddings 32 | 33 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 34 | 35 | ```python 36 | tool = XMLSearchTool( 37 | config=dict( 38 | llm=dict( 39 | provider="ollama", # or google, openai, anthropic, llama2, ... 40 | config=dict( 41 | model="llama2", 42 | # temperature=0.5, 43 | # top_p=1, 44 | # stream=true, 45 | ), 46 | ), 47 | embedder=dict( 48 | provider="google", 49 | config=dict( 50 | model="models/embedding-001", 51 | task_type="retrieval_document", 52 | # title="Embeddings", 53 | ), 54 | ), 55 | ) 56 | ) 57 | ``` 58 | -------------------------------------------------------------------------------- /praisonai_tools/tools/xml_search_tool/xml_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | 3 | from embedchain.models.data_type import DataType 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedXMLSearchToolSchema(BaseModel): 10 | """Input for XMLSearchTool.""" 11 | 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory search query you want to use to search the XML's content", 15 | ) 16 | 17 | 18 | class XMLSearchToolSchema(FixedXMLSearchToolSchema): 19 | """Input for XMLSearchTool.""" 20 | 21 | xml: str = Field(..., description="Mandatory xml path you want to search") 22 | 23 | 24 | class XMLSearchTool(RagTool): 25 | name: str = "Search a XML's content" 26 | description: str = ( 27 | "A tool that can be used to semantic search a query from a XML's content." 28 | ) 29 | args_schema: Type[BaseModel] = XMLSearchToolSchema 30 | 31 | def __init__(self, xml: Optional[str] = None, **kwargs): 32 | super().__init__(**kwargs) 33 | if xml is not None: 34 | self.add(xml) 35 | self.description = f"A tool that can be used to semantic search a query the {xml} XML's content." 36 | self.args_schema = FixedXMLSearchToolSchema 37 | self._generate_description() 38 | 39 | def add( 40 | self, 41 | *args: Any, 42 | **kwargs: Any, 43 | ) -> None: 44 | kwargs["data_type"] = DataType.XML 45 | super().add(*args, **kwargs) 46 | 47 | def _before_run( 48 | self, 49 | query: str, 50 | **kwargs: Any, 51 | ) -> Any: 52 | if "xml" in kwargs: 53 | self.add(kwargs["xml"]) 54 | 55 | def _run( 56 | self, 57 | search_query: str, 58 | **kwargs: Any, 59 | ) -> Any: 60 | return super()._run(query=search_query) 61 | -------------------------------------------------------------------------------- /praisonai_tools/tools/youtube_channel_search_tool/README.md: -------------------------------------------------------------------------------- 1 | # YoutubeChannelSearchTool 2 | 3 | ## Description 4 | This tool is designed to perform semantic searches within a specific Youtube channel's content. Leveraging the RAG (Retrieval-Augmented Generation) methodology, it provides relevant search results, making it invaluable for extracting information or finding specific content without the need to manually sift through videos. It streamlines the search process within Youtube channels, catering to researchers, content creators, and viewers seeking specific information or topics. 5 | 6 | ## Installation 7 | To utilize the YoutubeChannelSearchTool, the `praisonai_tools` package must be installed. Execute the following command in your shell to install: 8 | 9 | ```shell 10 | pip install 'praisonai[tools]' 11 | ``` 12 | 13 | ## Example 14 | To begin using the YoutubeChannelSearchTool, follow the example below. This demonstrates initializing the tool with a specific Youtube channel handle and conducting a search within that channel's content. 15 | 16 | ```python 17 | from praisonai_tools import YoutubeChannelSearchTool 18 | 19 | # Initialize the tool to search within any Youtube channel's content the agent learns about during its execution 20 | tool = YoutubeChannelSearchTool() 21 | 22 | # OR 23 | 24 | # Initialize the tool with a specific Youtube channel handle to target your search 25 | tool = YoutubeChannelSearchTool(youtube_channel_handle='@exampleChannel') 26 | ``` 27 | 28 | ## Arguments 29 | - `youtube_channel_handle` : A mandatory string representing the Youtube channel handle. This parameter is crucial for initializing the tool to specify the channel you want to search within. The tool is designed to only search within the content of the provided channel handle. 30 | 31 | ## Custom model and embeddings 32 | 33 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 34 | 35 | ```python 36 | tool = YoutubeChannelSearchTool( 37 | config=dict( 38 | llm=dict( 39 | provider="ollama", # or google, openai, anthropic, llama2, ... 40 | config=dict( 41 | model="llama2", 42 | # temperature=0.5, 43 | # top_p=1, 44 | # stream=true, 45 | ), 46 | ), 47 | embedder=dict( 48 | provider="google", 49 | config=dict( 50 | model="models/embedding-001", 51 | task_type="retrieval_document", 52 | # title="Embeddings", 53 | ), 54 | ), 55 | ) 56 | ) 57 | ``` 58 | -------------------------------------------------------------------------------- /praisonai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | 3 | from embedchain.models.data_type import DataType 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedYoutubeChannelSearchToolSchema(BaseModel): 10 | """Input for YoutubeChannelSearchTool.""" 11 | 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory search query you want to use to search the Youtube Channels content", 15 | ) 16 | 17 | 18 | class YoutubeChannelSearchToolSchema(FixedYoutubeChannelSearchToolSchema): 19 | """Input for YoutubeChannelSearchTool.""" 20 | 21 | youtube_channel_handle: str = Field( 22 | ..., description="Mandatory youtube_channel_handle path you want to search" 23 | ) 24 | 25 | 26 | class YoutubeChannelSearchTool(RagTool): 27 | name: str = "Search a Youtube Channels content" 28 | description: str = "A tool that can be used to semantic search a query from a Youtube Channels content." 29 | args_schema: Type[BaseModel] = YoutubeChannelSearchToolSchema 30 | 31 | def __init__(self, youtube_channel_handle: Optional[str] = None, **kwargs): 32 | super().__init__(**kwargs) 33 | if youtube_channel_handle is not None: 34 | self.add(youtube_channel_handle) 35 | self.description = f"A tool that can be used to semantic search a query the {youtube_channel_handle} Youtube Channels content." 36 | self.args_schema = FixedYoutubeChannelSearchToolSchema 37 | self._generate_description() 38 | 39 | def add( 40 | self, 41 | youtube_channel_handle: str, 42 | **kwargs: Any, 43 | ) -> None: 44 | if not youtube_channel_handle.startswith("@"): 45 | youtube_channel_handle = f"@{youtube_channel_handle}" 46 | 47 | kwargs["data_type"] = DataType.YOUTUBE_CHANNEL 48 | super().add(youtube_channel_handle, **kwargs) 49 | 50 | def _before_run( 51 | self, 52 | query: str, 53 | **kwargs: Any, 54 | ) -> Any: 55 | if "youtube_channel_handle" in kwargs: 56 | self.add(kwargs["youtube_channel_handle"]) 57 | 58 | def _run( 59 | self, 60 | search_query: str, 61 | **kwargs: Any, 62 | ) -> Any: 63 | return super()._run(query=search_query) 64 | -------------------------------------------------------------------------------- /praisonai_tools/tools/youtube_video_search_tool/README.md: -------------------------------------------------------------------------------- 1 | # YoutubeVideoSearchTool 2 | 3 | ## Description 4 | 5 | This tool is part of the `praisonai_tools` package and is designed to perform semantic searches within Youtube video content, utilizing Retrieval-Augmented Generation (RAG) techniques. It is one of several "Search" tools in the package that leverage RAG for different sources. The YoutubeVideoSearchTool allows for flexibility in searches; users can search across any Youtube video content without specifying a video URL, or they can target their search to a specific Youtube video by providing its URL. 6 | 7 | ## Installation 8 | 9 | To utilize the YoutubeVideoSearchTool, you must first install the `praisonai_tools` package. This package contains the YoutubeVideoSearchTool among other utilities designed to enhance your data analysis and processing tasks. Install the package by executing the following command in your terminal: 10 | 11 | ``` 12 | pip install 'praisonai[tools]' 13 | ``` 14 | 15 | ## Example 16 | 17 | To integrate the YoutubeVideoSearchTool into your Python projects, follow the example below. This demonstrates how to use the tool both for general Youtube content searches and for targeted searches within a specific video's content. 18 | 19 | ```python 20 | from praisonai_tools import YoutubeVideoSearchTool 21 | 22 | # General search across Youtube content without specifying a video URL, so the agent can search within any Youtube video content it learns about irs url during its operation 23 | tool = YoutubeVideoSearchTool() 24 | 25 | # Targeted search within a specific Youtube video's content 26 | tool = YoutubeVideoSearchTool(youtube_video_url='https://youtube.com/watch?v=example') 27 | ``` 28 | ## Arguments 29 | 30 | The YoutubeVideoSearchTool accepts the following initialization arguments: 31 | 32 | - `youtube_video_url`: An optional argument at initialization but required if targeting a specific Youtube video. It specifies the Youtube video URL path you want to search within. 33 | 34 | ## Custom model and embeddings 35 | 36 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows: 37 | 38 | ```python 39 | tool = YoutubeVideoSearchTool( 40 | config=dict( 41 | llm=dict( 42 | provider="ollama", # or google, openai, anthropic, llama2, ... 43 | config=dict( 44 | model="llama2", 45 | # temperature=0.5, 46 | # top_p=1, 47 | # stream=true, 48 | ), 49 | ), 50 | embedder=dict( 51 | provider="google", 52 | config=dict( 53 | model="models/embedding-001", 54 | task_type="retrieval_document", 55 | # title="Embeddings", 56 | ), 57 | ), 58 | ) 59 | ) 60 | ``` 61 | -------------------------------------------------------------------------------- /praisonai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Type 2 | 3 | from embedchain.models.data_type import DataType 4 | from pydantic.v1 import BaseModel, Field 5 | 6 | from ..rag.rag_tool import RagTool 7 | 8 | 9 | class FixedYoutubeVideoSearchToolSchema(BaseModel): 10 | """Input for YoutubeVideoSearchTool.""" 11 | 12 | search_query: str = Field( 13 | ..., 14 | description="Mandatory search query you want to use to search the Youtube Video content", 15 | ) 16 | 17 | 18 | class YoutubeVideoSearchToolSchema(FixedYoutubeVideoSearchToolSchema): 19 | """Input for YoutubeVideoSearchTool.""" 20 | 21 | youtube_video_url: str = Field( 22 | ..., description="Mandatory youtube_video_url path you want to search" 23 | ) 24 | 25 | 26 | class YoutubeVideoSearchTool(RagTool): 27 | name: str = "Search a Youtube Video content" 28 | description: str = "A tool that can be used to semantic search a query from a Youtube Video content." 29 | args_schema: Type[BaseModel] = YoutubeVideoSearchToolSchema 30 | 31 | def __init__(self, youtube_video_url: Optional[str] = None, **kwargs): 32 | super().__init__(**kwargs) 33 | if youtube_video_url is not None: 34 | self.add(youtube_video_url) 35 | self.description = f"A tool that can be used to semantic search a query the {youtube_video_url} Youtube Video content." 36 | self.args_schema = FixedYoutubeVideoSearchToolSchema 37 | self._generate_description() 38 | 39 | def add( 40 | self, 41 | *args: Any, 42 | **kwargs: Any, 43 | ) -> None: 44 | kwargs["data_type"] = DataType.YOUTUBE_VIDEO 45 | super().add(*args, **kwargs) 46 | 47 | def _before_run( 48 | self, 49 | query: str, 50 | **kwargs: Any, 51 | ) -> Any: 52 | if "youtube_video_url" in kwargs: 53 | self.add(kwargs["youtube_video_url"]) 54 | 55 | def _run( 56 | self, 57 | search_query: str, 58 | **kwargs: Any, 59 | ) -> Any: 60 | return super()._run(query=search_query) 61 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "PraisonAI-Tools" 3 | version = "0.0.16" 4 | description = "Set of tools for the PraisonAI framework" 5 | authors = [ 6 | {name = "Mervin Praison"} 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.10,<=3.13.2" 10 | dependencies = [ 11 | "pydantic>=2.6.1,<=2.10.1", 12 | "langchain>=0.3.25,<0.4.0", 13 | "pytest>=8.3.5,<9.0.0", 14 | "lancedb>=0.22.0,<0.25.0", 15 | "openai>=1.75.0,<2.0.0", 16 | "embedchain>=0.1.128", 17 | "chromadb>=0.5.10,<0.6.0", 18 | "pyright>=1.1.400,<2.0.0", 19 | "pytube>=15.0.0,<16.0.0", 20 | "requests>=2.32.3,<3.0.0", 21 | "beautifulsoup4>=4.13.4,<5.0.0", 22 | "selenium>=4.32.0,<5.0.0", 23 | "docx2txt>=0.8,<0.9", 24 | "crewai-tools>=0.44.0,<0.50.0", 25 | "docker>=7.1.0,<8.0.0", 26 | "crewai>=0.118.0", 27 | "click>=8.2.0,<9.0.0", 28 | ] 29 | 30 | [project.urls] 31 | Homepage = "https://docs.praison.ai" 32 | Repository = "https://github.com/mervinpraison/PraisonAI-tools" 33 | 34 | # Poetry configuration 35 | [tool.poetry] 36 | name = "PraisonAI-Tools" 37 | version = "0.0.16" 38 | description = "Set of tools for the PraisonAI framework" 39 | authors = ["Mervin Praison"] 40 | readme = "README.md" 41 | 42 | [tool.poetry.dependencies] 43 | python = ">=3.10,<=3.13.2" 44 | pydantic = "^2.6.1" 45 | langchain = ">=0.3.25,<0.4.0" 46 | pytest = "^8.3.5" 47 | lancedb = "^0.22.0" 48 | openai = "^1.75.0" 49 | embedchain = {extras = ["github", "youtube"], version = ">=0.1.123"} 50 | chromadb = ">=0.5.10,<0.6.0" 51 | pyright = "^1.1.400" 52 | pytube = "^15.0.0" 53 | requests = "^2.32.3" 54 | beautifulsoup4 = "^4.13.4" 55 | selenium = "^4.32.0" 56 | docx2txt = "^0.8" 57 | crewai-tools = "^0.44.0" 58 | docker = "^7.1.0" 59 | crewai = "^0.118.0" 60 | click = "^8.2.0" 61 | 62 | [tool.poetry.urls] 63 | Homepage = "https://docs.praison.ai" 64 | Repository = "https://github.com/mervinpraison/PraisonAI-tools" 65 | 66 | [build-system] 67 | requires = ["hatchling>=1.0.0", "poetry-core>=1.0.0"] 68 | build-backend = "hatchling.build" -------------------------------------------------------------------------------- /tests/base_tool_test.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | from praisonai_tools import BaseTool, tool 3 | 4 | def test_creating_a_tool_using_annotation(): 5 | @tool("Name of my tool") 6 | def my_tool(question: str) -> str: 7 | """Clear description for what this tool is useful for, you agent will need this information to use it.""" 8 | return question 9 | 10 | # Assert all the right attributes were defined 11 | assert my_tool.name == "Name of my tool" 12 | assert my_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it." 13 | assert my_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}} 14 | assert my_tool.func("What is the meaning of life?") == "What is the meaning of life?" 15 | 16 | # Assert the langchain tool conversion worked as expected 17 | converted_tool = my_tool.to_langchain() 18 | assert converted_tool.name == "Name of my tool" 19 | assert converted_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it." 20 | assert converted_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}} 21 | assert converted_tool.func("What is the meaning of life?") == "What is the meaning of life?" 22 | 23 | def test_creating_a_tool_using_baseclass(): 24 | class MyCustomTool(BaseTool): 25 | name: str = "Name of my tool" 26 | description: str = "Clear description for what this tool is useful for, you agent will need this information to use it." 27 | 28 | def _run(self, question: str) -> str: 29 | return question 30 | 31 | my_tool = MyCustomTool() 32 | # Assert all the right attributes were defined 33 | assert my_tool.name == "Name of my tool" 34 | assert my_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it." 35 | assert my_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}} 36 | assert my_tool.run("What is the meaning of life?") == "What is the meaning of life?" 37 | 38 | # Assert the langchain tool conversion worked as expected 39 | converted_tool = my_tool.to_langchain() 40 | assert converted_tool.name == "Name of my tool" 41 | assert converted_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it." 42 | assert converted_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}} 43 | assert converted_tool.run("What is the meaning of life?") == "What is the meaning of life?" 44 | 45 | def test_setting_cache_function(): 46 | class MyCustomTool(BaseTool): 47 | name: str = "Name of my tool" 48 | description: str = "Clear description for what this tool is useful for, you agent will need this information to use it." 49 | cache_function: Callable = lambda: False 50 | 51 | def _run(self, question: str) -> str: 52 | return question 53 | 54 | my_tool = MyCustomTool() 55 | # Assert all the right attributes were defined 56 | assert my_tool.cache_function() == False 57 | 58 | def test_default_cache_function_is_true(): 59 | class MyCustomTool(BaseTool): 60 | name: str = "Name of my tool" 61 | description: str = "Clear description for what this tool is useful for, you agent will need this information to use it." 62 | 63 | def _run(self, question: str) -> str: 64 | return question 65 | 66 | my_tool = MyCustomTool() 67 | # Assert all the right attributes were defined 68 | assert my_tool.cache_function() == True -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from typing import Callable 2 | 3 | import pytest 4 | 5 | 6 | class Helpers: 7 | @staticmethod 8 | def get_embedding_function() -> Callable: 9 | def _func(input): 10 | assert input == ["What are the requirements for the task?"] 11 | with open("tests/data/embedding.txt", "r") as file: 12 | content = file.read() 13 | numbers = content.split(",") 14 | return [[float(number) for number in numbers]] 15 | 16 | return _func 17 | 18 | 19 | @pytest.fixture 20 | def helpers(): 21 | return Helpers 22 | -------------------------------------------------------------------------------- /tests/tools/rag/rag_tool_test.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tempfile import NamedTemporaryFile 3 | from typing import cast 4 | from unittest import mock 5 | 6 | from pytest import fixture 7 | 8 | from praisonai_tools.adapters.embedchain_adapter import EmbedchainAdapter 9 | from praisonai_tools.tools.rag.rag_tool import RagTool 10 | 11 | 12 | @fixture(autouse=True) 13 | def mock_embedchain_db_uri(): 14 | with NamedTemporaryFile() as tmp: 15 | uri = f"sqlite:///{tmp.name}" 16 | with mock.patch.dict(os.environ, {"EMBEDCHAIN_DB_URI": uri}): 17 | yield 18 | 19 | 20 | def test_custom_llm_and_embedder(): 21 | class MyTool(RagTool): 22 | pass 23 | 24 | tool = MyTool( 25 | config=dict( 26 | llm=dict( 27 | provider="openai", 28 | config=dict(model="gpt-3.5-custom"), 29 | ), 30 | embedder=dict( 31 | provider="openai", 32 | config=dict(model="text-embedding-3-custom"), 33 | ), 34 | ) 35 | ) 36 | assert tool.adapter is not None 37 | assert isinstance(tool.adapter, EmbedchainAdapter) 38 | 39 | adapter = cast(EmbedchainAdapter, tool.adapter) 40 | assert adapter.embedchain_app.llm.config.model == "gpt-3.5-custom" 41 | assert ( 42 | adapter.embedchain_app.embedding_model.config.model == "text-embedding-3-custom" 43 | ) 44 | --------------------------------------------------------------------------------