├── .github
    └── workflows
    │   ├── python-package.yml
    │   └── python-publish.yml
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── poetry.lock
├── praisonai_tools
    ├── __init__.py
    ├── adapters
    │   ├── embedchain_adapter.py
    │   └── lancedb_adapter.py
    └── tools
    │   ├── __init__.py
    │   ├── base_tool.py
    │   ├── browserbase_load_tool
    │       ├── README.md
    │       └── browserbase_load_tool.py
    │   ├── code_docs_search_tool
    │       ├── README.md
    │       └── code_docs_search_tool.py
    │   ├── csv_search_tool
    │       ├── README.md
    │       └── csv_search_tool.py
    │   ├── directory_read_tool
    │       ├── README.md
    │       └── directory_read_tool.py
    │   ├── directory_search_tool
    │       ├── README.md
    │       └── directory_search_tool.py
    │   ├── docx_search_tool
    │       ├── README.md
    │       └── docx_search_tool.py
    │   ├── exa_tools
    │       ├── README.md
    │       ├── exa_base_tool.py
    │       └── exa_search_tool.py
    │   ├── file_read_tool
    │       ├── README.md
    │       └── file_read_tool.py
    │   ├── github_search_tool
    │       ├── README.md
    │       └── github_search_tool.py
    │   ├── json_search_tool
    │       ├── README.md
    │       └── json_search_tool.py
    │   ├── llamaindex_tool
    │       ├── README.md
    │       └── llamaindex_tool.py
    │   ├── mdx_seach_tool
    │       ├── README.md
    │       └── mdx_search_tool.py
    │   ├── pdf_search_tool
    │       ├── README.md
    │       └── pdf_search_tool.py
    │   ├── pdf_text_writing_tool
    │       └── pdf_text_writing_tool.py
    │   ├── pg_seach_tool
    │       ├── README.md
    │       └── pg_search_tool.py
    │   ├── rag
    │       ├── README.md
    │       ├── __init__.py
    │       └── rag_tool.py
    │   ├── scrape_element_from_website
    │       └── scrape_element_from_website.py
    │   ├── scrape_website_tool
    │       ├── README.md
    │       └── scrape_website_tool.py
    │   ├── selenium_scraping_tool
    │       ├── README.md
    │       └── selenium_scraping_tool.py
    │   ├── serper_dev_tool
    │       ├── README.md
    │       └── serper_dev_tool.py
    │   ├── txt_search_tool
    │       ├── README.md
    │       └── txt_search_tool.py
    │   ├── website_search
    │       ├── README.md
    │       └── website_search_tool.py
    │   ├── xml_search_tool
    │       ├── README.md
    │       └── xml_search_tool.py
    │   ├── youtube_channel_search_tool
    │       ├── README.md
    │       └── youtube_channel_search_tool.py
    │   └── youtube_video_search_tool
    │       ├── README.md
    │       └── youtube_video_search_tool.py
├── pyproject.toml
├── tests
    ├── base_tool_test.py
    ├── conftest.py
    └── tools
    │   └── rag
    │       └── rag_tool_test.py
└── uv.lock


/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ "develop" ]
 9 |   pull_request:
10 |     branches: [ "develop" ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python-version: ["3.10", "3.11"]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v3
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v3
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         python -m pip install --upgrade pip
30 |         python -m pip install flake8 pytest
31 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi


--------------------------------------------------------------------------------
/.github/workflows/python-publish.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will upload a Python Package using Twine when a release is created
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries
 3 | 
 4 | # This workflow uses actions that are not certified by GitHub.
 5 | # They are provided by a third-party and are governed by
 6 | # separate terms of service, privacy policy, and support
 7 | # documentation.
 8 | 
 9 | name: Upload Python Package
10 | 
11 | on:
12 |   release:
13 |     types: [published]
14 | 
15 | permissions:
16 |   contents: read
17 | 
18 | jobs:
19 |   deploy:
20 | 
21 |     runs-on: ubuntu-latest
22 | 
23 |     steps:
24 |     - uses: actions/checkout@v3
25 |     - name: Set up Python
26 |       uses: actions/setup-python@v3
27 |       with:
28 |         python-version: '3.x'
29 |     - name: Install dependencies
30 |       run: |
31 |         python -m pip install --upgrade pip
32 |         pip install build
33 |     - name: Build package
34 |       run: python -m build
35 |     - name: Publish package
36 |       uses: pypa/gh-action-pypi-publish@v1.9.0
37 |       with:
38 |         user: __token__
39 |         password: ${{ secrets.PYPI_API_TOKEN }}


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | venv
 2 | .DS_Store
 3 | .pytest_cache
 4 | __pycache__
 5 | dist/
 6 | .env
 7 | .idea
 8 | test.py
 9 | chroma.sqlite3
10 | .vscode
11 | praisonai
12 | .cache
13 | __pycache__
14 | chroma.sqlite3
15 | test/
16 | .env
17 | assets/*
18 | .idea
19 | .DS_Store
20 | .pytest_cache
21 | praisonAI.egg-info
22 | flagged
23 | test.yaml
24 | db
25 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 | 
 3 |   - repo: https://github.com/psf/black-pre-commit-mirror
 4 |     rev: 23.12.1
 5 |     hooks:
 6 |       - id: black
 7 |         language_version: python3.11
 8 |         files: \.(py)$
 9 | 
10 |   - repo: https://github.com/pycqa/isort
11 |     rev: 5.13.2
12 |     hooks:
13 |       - id: isort
14 |         name: isort (python)
15 |         args: ["--profile", "black", "--filter-files"]
16 | 
17 |   - repo: https://github.com/PyCQA/autoflake
18 |     rev: v2.2.1
19 |     hooks:
20 |       - id: autoflake
21 |         args: ['--in-place', '--remove-all-unused-imports', '--remove-unused-variables', '--ignore-init-module-imports']
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 João Moura
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Praison AI Tools


--------------------------------------------------------------------------------
/praisonai_tools/__init__.py:
--------------------------------------------------------------------------------
 1 | from crewai_tools import BaseTool, Tool, tool
 2 | from crewai_tools import (
 3 | 	BrowserbaseLoadTool,
 4 | 	CodeDocsSearchTool,
 5 | 	CSVSearchTool,
 6 | 	DirectorySearchTool,
 7 | 	DOCXSearchTool,
 8 | 	DirectoryReadTool,
 9 |  	EXASearchTool,
10 | 	FileReadTool,
11 | 	GithubSearchTool,
12 | 	SerperDevTool,
13 | 	TXTSearchTool,
14 | 	JSONSearchTool,
15 | 	MDXSearchTool,
16 | 	PDFSearchTool,
17 | 	PGSearchTool,
18 | 	RagTool,
19 | 	ScrapeElementFromWebsiteTool,
20 | 	ScrapeWebsiteTool,
21 | 	SeleniumScrapingTool,
22 | 	WebsiteSearchTool,
23 | 	XMLSearchTool,
24 | 	YoutubeChannelSearchTool,
25 | 	YoutubeVideoSearchTool,
26 | 	LlamaIndexTool
27 | )


--------------------------------------------------------------------------------
/praisonai_tools/adapters/embedchain_adapter.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | from embedchain import App
 4 | 
 5 | from praisonai_tools.tools.rag.rag_tool import Adapter
 6 | 
 7 | 
 8 | class EmbedchainAdapter(Adapter):
 9 |     embedchain_app: App
10 |     summarize: bool = False
11 | 
12 |     def query(self, question: str) -> str:
13 |         result, sources = self.embedchain_app.query(
14 |             question, citations=True, dry_run=(not self.summarize)
15 |         )
16 |         if self.summarize:
17 |             return result
18 |         return "\n\n".join([source[0] for source in sources])
19 | 
20 |     def add(
21 |         self,
22 |         *args: Any,
23 |         **kwargs: Any,
24 |     ) -> None:
25 |         self.embedchain_app.add(*args, **kwargs)
26 | 


--------------------------------------------------------------------------------
/praisonai_tools/adapters/lancedb_adapter.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | from typing import Any, Callable
 3 | 
 4 | from lancedb import DBConnection as LanceDBConnection
 5 | from lancedb import connect as lancedb_connect
 6 | from lancedb.table import Table as LanceDBTable
 7 | from openai import Client as OpenAIClient
 8 | from pydantic import Field, PrivateAttr
 9 | 
10 | from praisonai_tools.tools.rag.rag_tool import Adapter
11 | 
12 | 
13 | def _default_embedding_function():
14 |     client = OpenAIClient()
15 | 
16 |     def _embedding_function(input):
17 |         rs = client.embeddings.create(input=input, model="text-embedding-ada-002")
18 |         return [record.embedding for record in rs.data]
19 | 
20 |     return _embedding_function
21 | 
22 | 
23 | class LanceDBAdapter(Adapter):
24 |     uri: str | Path
25 |     table_name: str
26 |     embedding_function: Callable = Field(default_factory=_default_embedding_function)
27 |     top_k: int = 3
28 |     vector_column_name: str = "vector"
29 |     text_column_name: str = "text"
30 | 
31 |     _db: LanceDBConnection = PrivateAttr()
32 |     _table: LanceDBTable = PrivateAttr()
33 | 
34 |     def model_post_init(self, __context: Any) -> None:
35 |         self._db = lancedb_connect(self.uri)
36 |         self._table = self._db.open_table(self.table_name)
37 | 
38 |         super().model_post_init(__context)
39 | 
40 |     def query(self, question: str) -> str:
41 |         query = self.embedding_function([question])[0]
42 |         results = (
43 |             self._table.search(query, vector_column_name=self.vector_column_name)
44 |             .limit(self.top_k)
45 |             .select([self.text_column_name])
46 |             .to_list()
47 |         )
48 |         values = [result[self.text_column_name] for result in results]
49 |         return "\n".join(values)
50 | 
51 |     def add(
52 |         self,
53 |         *args: Any,
54 |         **kwargs: Any,
55 |     ) -> None:
56 |         self._table.add(*args, **kwargs)
57 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/__init__.py:
--------------------------------------------------------------------------------
 1 | from .browserbase_load_tool.browserbase_load_tool import BrowserbaseLoadTool
 2 | from .code_docs_search_tool.code_docs_search_tool import CodeDocsSearchTool
 3 | from .csv_search_tool.csv_search_tool import CSVSearchTool
 4 | from .directory_search_tool.directory_search_tool import DirectorySearchTool
 5 | from .directory_read_tool.directory_read_tool import DirectoryReadTool
 6 | from .docx_search_tool.docx_search_tool import DOCXSearchTool
 7 | from .exa_tools.exa_search_tool import EXASearchTool
 8 | from .file_read_tool.file_read_tool import FileReadTool
 9 | from .github_search_tool.github_search_tool import GithubSearchTool
10 | from .serper_dev_tool.serper_dev_tool import SerperDevTool
11 | from .txt_search_tool.txt_search_tool import TXTSearchTool
12 | from .json_search_tool.json_search_tool import JSONSearchTool
13 | from .mdx_seach_tool.mdx_search_tool import MDXSearchTool
14 | from .pdf_search_tool.pdf_search_tool import PDFSearchTool
15 | from .pg_seach_tool.pg_search_tool import PGSearchTool
16 | from .rag.rag_tool import RagTool
17 | from .scrape_element_from_website.scrape_element_from_website import ScrapeElementFromWebsiteTool
18 | from .scrape_website_tool.scrape_website_tool import ScrapeWebsiteTool
19 | from .selenium_scraping_tool.selenium_scraping_tool import SeleniumScrapingTool
20 | from .website_search.website_search_tool import WebsiteSearchTool
21 | from .xml_search_tool.xml_search_tool import XMLSearchTool
22 | from .youtube_channel_search_tool.youtube_channel_search_tool import YoutubeChannelSearchTool
23 | from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool
24 | from .llamaindex_tool.llamaindex_tool import LlamaIndexTool
25 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/base_tool.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from typing import Any, Callable, Optional, Type
  3 | 
  4 | from langchain_core.tools import StructuredTool
  5 | from pydantic import BaseModel, ConfigDict, Field, validator
  6 | from pydantic.v1 import BaseModel as V1BaseModel
  7 | 
  8 | 
  9 | class BaseTool(BaseModel, ABC):
 10 |     class _ArgsSchemaPlaceholder(V1BaseModel):
 11 |         pass
 12 | 
 13 |     model_config = ConfigDict()
 14 | 
 15 |     name: str
 16 |     """The unique name of the tool that clearly communicates its purpose."""
 17 |     description: str
 18 |     """Used to tell the model how/when/why to use the tool."""
 19 |     args_schema: Type[V1BaseModel] = Field(default_factory=_ArgsSchemaPlaceholder)
 20 |     """The schema for the arguments that the tool accepts."""
 21 |     description_updated: bool = False
 22 |     """Flag to check if the description has been updated."""
 23 |     cache_function: Optional[Callable] = lambda _args, _result: True
 24 |     """Function that will be used to determine if the tool should be cached, should return a boolean. If None, the tool will be cached."""
 25 | 
 26 |     @validator("args_schema", always=True, pre=True)
 27 |     def _default_args_schema(cls, v: Type[V1BaseModel]) -> Type[V1BaseModel]:
 28 |         if not isinstance(v, cls._ArgsSchemaPlaceholder):
 29 |             return v
 30 | 
 31 |         return type(
 32 |             f"{cls.__name__}Schema",
 33 |             (V1BaseModel,),
 34 |             {
 35 |                 "__annotations__": {
 36 |                     k: v for k, v in cls._run.__annotations__.items() if k != "return"
 37 |                 },
 38 |             },
 39 |         )
 40 | 
 41 |     def model_post_init(self, __context: Any) -> None:
 42 |         self._generate_description()
 43 | 
 44 |         super().model_post_init(__context)
 45 | 
 46 |     def run(
 47 |         self,
 48 |         *args: Any,
 49 |         **kwargs: Any,
 50 |     ) -> Any:
 51 |         print(f"Using Tool: {self.name}")
 52 |         return self._run(*args, **kwargs)
 53 | 
 54 |     @abstractmethod
 55 |     def _run(
 56 |         self,
 57 |         *args: Any,
 58 |         **kwargs: Any,
 59 |     ) -> Any:
 60 |         """Here goes the actual implementation of the tool."""
 61 | 
 62 |     def to_langchain(self) -> StructuredTool:
 63 |         self._set_args_schema()
 64 |         return StructuredTool(
 65 |             name=self.name,
 66 |             description=self.description,
 67 |             args_schema=self.args_schema,
 68 |             func=self._run,
 69 |         )
 70 | 
 71 |     def _set_args_schema(self):
 72 |         if self.args_schema is None:
 73 |             class_name = f"{self.__class__.__name__}Schema"
 74 |             self.args_schema = type(
 75 |                 class_name,
 76 |                 (V1BaseModel,),
 77 |                 {
 78 |                     "__annotations__": {
 79 |                         k: v
 80 |                         for k, v in self._run.__annotations__.items()
 81 |                         if k != "return"
 82 |                     },
 83 |                 },
 84 |             )
 85 | 
 86 |     def _generate_description(self):
 87 |         args = []
 88 |         for arg, attribute in self.args_schema.schema()["properties"].items():
 89 |             if "type" in attribute:
 90 |                 args.append(f"{arg}: '{attribute['type']}'")
 91 | 
 92 |         description = self.description.replace("\n", " ")
 93 |         self.description = f"{self.name}({', '.join(args)}) - {description}"
 94 | 
 95 | 
 96 | class Tool(BaseTool):
 97 |     func: Callable
 98 |     """The function that will be executed when the tool is called."""
 99 | 
100 |     def _run(self, *args: Any, **kwargs: Any) -> Any:
101 |         return self.func(*args, **kwargs)
102 | 
103 | 
104 | def to_langchain(
105 |     tools: list[BaseTool | StructuredTool],
106 | ) -> list[StructuredTool]:
107 |     return [t.to_langchain() if isinstance(t, BaseTool) else t for t in tools]
108 | 
109 | 
110 | def tool(*args):
111 |     """
112 |     Decorator to create a tool from a function.
113 |     """
114 | 
115 |     def _make_with_name(tool_name: str) -> Callable:
116 |         def _make_tool(f: Callable) -> BaseTool:
117 |             if f.__doc__ is None:
118 |                 raise ValueError("Function must have a docstring")
119 |             if f.__annotations__ is None:
120 |                 raise ValueError("Function must have type annotations")
121 | 
122 |             class_name = "".join(tool_name.split()).title()
123 |             args_schema = type(
124 |                 class_name,
125 |                 (V1BaseModel,),
126 |                 {
127 |                     "__annotations__": {
128 |                         k: v for k, v in f.__annotations__.items() if k != "return"
129 |                     },
130 |                 },
131 |             )
132 | 
133 |             return Tool(
134 |                 name=tool_name,
135 |                 description=f.__doc__,
136 |                 func=f,
137 |                 args_schema=args_schema,
138 |             )
139 | 
140 |         return _make_tool
141 | 
142 |     if len(args) == 1 and callable(args[0]):
143 |         return _make_with_name(args[0].__name__)(args[0])
144 |     if len(args) == 1 and isinstance(args[0], str):
145 |         return _make_with_name(args[0])
146 |     raise ValueError("Invalid arguments")
147 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/browserbase_load_tool/README.md:
--------------------------------------------------------------------------------
 1 | # BrowserbaseLoadTool
 2 | 
 3 | ## Description
 4 | 
 5 | [Browserbase](https://browserbase.com) is a serverless platform for running headless browsers, it offers advanced debugging, session recordings, stealth mode, integrated proxies and captcha solving.
 6 | 
 7 | ## Installation
 8 | 
 9 | - Get an API key from [browserbase.com](https://browserbase.com) and set it in environment variables (`BROWSERBASE_API_KEY`).
10 | - Install the [Browserbase SDK](http://github.com/browserbase/python-sdk) along with `praisonai[tools]` package:
11 | 
12 | ```
13 | pip install browserbase 'praisonai[tools]'
14 | ```
15 | 
16 | ## Example
17 | 
18 | Utilize the BrowserbaseLoadTool as follows to allow your agent to load websites:
19 | 
20 | ```python
21 | from praisonai_tools import BrowserbaseLoadTool
22 | 
23 | tool = BrowserbaseLoadTool()
24 | ```
25 | 
26 | ## Arguments
27 | 
28 | - `api_key`: Optional. Specifies Browserbase API key. Defaults is the `BROWSERBASE_API_KEY` environment variable.
29 | - `text_content`: Optional. Load pages as readable text. Default is `False`.
30 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/browserbase_load_tool/browserbase_load_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Any, Type
 2 | from pydantic.v1 import BaseModel, Field
 3 | from praisonai_tools.tools.base_tool import BaseTool
 4 | 
 5 | class BrowserbaseLoadToolSchema(BaseModel):
 6 |     url: str = Field(description="Website URL")
 7 | 
 8 | class BrowserbaseLoadTool(BaseTool):
 9 |     name: str = "Browserbase web load tool"
10 |     description: str = "Load webpages url in a headless browser using Browserbase and return the contents"
11 |     args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema
12 |     api_key: Optional[str] = None
13 |     text_content: Optional[bool] = False
14 |     browserbase: Optional[Any] = None
15 | 
16 |     def __init__(self, api_key: Optional[str] = None, text_content: Optional[bool] = False, **kwargs):
17 |         super().__init__(**kwargs)
18 |         try:
19 |             from browserbase import Browserbase # type: ignore
20 |         except ImportError:
21 |            raise ImportError(
22 |                "`browserbase` package not found, please run `pip install browserbase`"
23 |            )
24 | 
25 |         self.browserbase = Browserbase(api_key=api_key)
26 |         self.text_content = text_content
27 | 
28 |     def _run(self, url: str):
29 |         return self.browserbase.load_url(url, text_content=self.text_content)
30 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/code_docs_search_tool/README.md:
--------------------------------------------------------------------------------
 1 | # CodeDocsSearchTool
 2 | 
 3 | ## Description
 4 | The CodeDocsSearchTool is a powerful RAG (Retrieval-Augmented Generation) tool designed for semantic searches within code documentation. It enables users to efficiently find specific information or topics within code documentation. By providing a `docs_url` during initialization, the tool narrows down the search to that particular documentation site. Alternatively, without a specific `docs_url`, it searches across a wide array of code documentation known or discovered throughout its execution, making it versatile for various documentation search needs.
 5 | 
 6 | ## Installation
 7 | To start using the CodeDocsSearchTool, first, install the praisonai_tools package via pip:
 8 | ```shell
 9 | pip install 'praisonai[tools]'
10 | ```
11 | 
12 | ## Example
13 | Utilize the CodeDocsSearchTool as follows to conduct searches within code documentation:
14 | ```python
15 | from praisonai_tools import CodeDocsSearchTool
16 | 
17 | # To search any code documentation content if the URL is known or discovered during its execution:
18 | tool = CodeDocsSearchTool()
19 | 
20 | # OR
21 | 
22 | # To specifically focus your search on a given documentation site by providing its URL:
23 | tool = CodeDocsSearchTool(docs_url='https://docs.example.com/reference')
24 | ```
25 | Note: Substitute 'https://docs.example.com/reference' with your target documentation URL and 'How to use search tool' with the search query relevant to your needs.
26 | 
27 | ## Arguments
28 | - `docs_url`: Optional. Specifies the URL of the code documentation to be searched. Providing this during the tool's initialization focuses the search on the specified documentation content.
29 | 
30 | ## Custom model and embeddings
31 | 
32 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
33 | 
34 | ```python
35 | tool = YoutubeVideoSearchTool(
36 |     config=dict(
37 |         llm=dict(
38 |             provider="ollama", # or google, openai, anthropic, llama2, ...
39 |             config=dict(
40 |                 model="llama2",
41 |                 # temperature=0.5,
42 |                 # top_p=1,
43 |                 # stream=true,
44 |             ),
45 |         ),
46 |         embedder=dict(
47 |             provider="google",
48 |             config=dict(
49 |                 model="models/embedding-001",
50 |                 task_type="retrieval_document",
51 |                 # title="Embeddings",
52 |             ),
53 |         ),
54 |     )
55 | )
56 | ```
57 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/code_docs_search_tool/code_docs_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | 
 3 | from embedchain.models.data_type import DataType
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedCodeDocsSearchToolSchema(BaseModel):
10 |     """Input for CodeDocsSearchTool."""
11 | 
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory search query you want to use to search the Code Docs content",
15 |     )
16 | 
17 | 
18 | class CodeDocsSearchToolSchema(FixedCodeDocsSearchToolSchema):
19 |     """Input for CodeDocsSearchTool."""
20 | 
21 |     docs_url: str = Field(..., description="Mandatory docs_url path you want to search")
22 | 
23 | 
24 | class CodeDocsSearchTool(RagTool):
25 |     name: str = "Search a Code Docs content"
26 |     description: str = (
27 |         "A tool that can be used to semantic search a query from a Code Docs content."
28 |     )
29 |     args_schema: Type[BaseModel] = CodeDocsSearchToolSchema
30 | 
31 |     def __init__(self, docs_url: Optional[str] = None, **kwargs):
32 |         super().__init__(**kwargs)
33 |         if docs_url is not None:
34 |             self.add(docs_url)
35 |             self.description = f"A tool that can be used to semantic search a query the {docs_url} Code Docs content."
36 |             self.args_schema = FixedCodeDocsSearchToolSchema
37 |             self._generate_description()
38 | 
39 |     def add(
40 |         self,
41 |         *args: Any,
42 |         **kwargs: Any,
43 |     ) -> None:
44 |         kwargs["data_type"] = DataType.DOCS_SITE
45 |         super().add(*args, **kwargs)
46 | 
47 |     def _before_run(
48 |         self,
49 |         query: str,
50 |         **kwargs: Any,
51 |     ) -> Any:
52 |         if "docs_url" in kwargs:
53 |             self.add(kwargs["docs_url"])
54 | 
55 |     def _run(
56 |         self,
57 |         search_query: str,
58 |         **kwargs: Any,
59 |     ) -> Any:
60 |         return super()._run(query=search_query)
61 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/csv_search_tool/README.md:
--------------------------------------------------------------------------------
 1 | # CSVSearchTool
 2 | 
 3 | ## Description
 4 | 
 5 | This tool is used to perform a RAG (Retrieval-Augmented Generation) search within a CSV file's content. It allows users to semantically search for queries in the content of a specified CSV file. This feature is particularly useful for extracting information from large CSV datasets where traditional search methods might be inefficient. All tools with "Search" in their name, including CSVSearchTool, are RAG tools designed for searching different sources of data.
 6 | 
 7 | ## Installation
 8 | 
 9 | Install the praisonai_tools package
10 | 
11 | ```shell
12 | pip install 'praisonai[tools]'
13 | ```
14 | 
15 | ## Example
16 | 
17 | ```python
18 | from praisonai_tools import CSVSearchTool
19 | 
20 | # Initialize the tool with a specific CSV file. This setup allows the agent to only search the given CSV file.
21 | tool = CSVSearchTool(csv='path/to/your/csvfile.csv')
22 | 
23 | # OR
24 | 
25 | # Initialize the tool without a specific CSV file. Agent  will need to provide the CSV path at runtime.
26 | tool = CSVSearchTool()
27 | ```
28 | 
29 | ## Arguments
30 | 
31 | - `csv` : The path to the CSV file you want to search. This is a mandatory argument if the tool was initialized without a specific CSV file; otherwise, it is optional.
32 | 
33 | ## Custom model and embeddings
34 | 
35 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
36 | 
37 | ```python
38 | tool = CSVSearchTool(
39 |     config=dict(
40 |         llm=dict(
41 |             provider="ollama", # or google, openai, anthropic, llama2, ...
42 |             config=dict(
43 |                 model="llama2",
44 |                 # temperature=0.5,
45 |                 # top_p=1,
46 |                 # stream=true,
47 |             ),
48 |         ),
49 |         embedder=dict(
50 |             provider="google",
51 |             config=dict(
52 |                 model="models/embedding-001",
53 |                 task_type="retrieval_document",
54 |                 # title="Embeddings",
55 |             ),
56 |         ),
57 |     )
58 | )
59 | ```
60 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/csv_search_tool/csv_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | 
 3 | from embedchain.models.data_type import DataType
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedCSVSearchToolSchema(BaseModel):
10 |     """Input for CSVSearchTool."""
11 | 
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory search query you want to use to search the CSV's content",
15 |     )
16 | 
17 | 
18 | class CSVSearchToolSchema(FixedCSVSearchToolSchema):
19 |     """Input for CSVSearchTool."""
20 | 
21 |     csv: str = Field(..., description="Mandatory csv path you want to search")
22 | 
23 | 
24 | class CSVSearchTool(RagTool):
25 |     name: str = "Search a CSV's content"
26 |     description: str = (
27 |         "A tool that can be used to semantic search a query from a CSV's content."
28 |     )
29 |     args_schema: Type[BaseModel] = CSVSearchToolSchema
30 | 
31 |     def __init__(self, csv: Optional[str] = None, **kwargs):
32 |         super().__init__(**kwargs)
33 |         if csv is not None:
34 |             self.add(csv)
35 |             self.description = f"A tool that can be used to semantic search a query the {csv} CSV's content."
36 |             self.args_schema = FixedCSVSearchToolSchema
37 |             self._generate_description()
38 | 
39 |     def add(
40 |         self,
41 |         *args: Any,
42 |         **kwargs: Any,
43 |     ) -> None:
44 |         kwargs["data_type"] = DataType.CSV
45 |         super().add(*args, **kwargs)
46 | 
47 |     def _before_run(
48 |         self,
49 |         query: str,
50 |         **kwargs: Any,
51 |     ) -> Any:
52 |         if "csv" in kwargs:
53 |             self.add(kwargs["csv"])
54 | 
55 |     def _run(
56 |         self,
57 |         search_query: str,
58 |         **kwargs: Any,
59 |     ) -> Any:
60 |         return super()._run(query=search_query)
61 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/directory_read_tool/README.md:
--------------------------------------------------------------------------------
 1 | ```markdown
 2 | # DirectoryReadTool
 3 | 
 4 | ## Description
 5 | The DirectoryReadTool is a highly efficient utility designed for the comprehensive listing of directory contents. It recursively navigates through the specified directory, providing users with a detailed enumeration of all files, including those nested within subdirectories. This tool is indispensable for tasks requiring a thorough inventory of directory structures or for validating the organization of files within directories.
 6 | 
 7 | ## Installation
 8 | Install the `praisonai_tools` package to use the DirectoryReadTool in your project. If you haven't added this package to your environment, you can easily install it with pip using the following command:
 9 | 
10 | ```shell
11 | pip install 'praisonai[tools]'
12 | ```
13 | 
14 | This installs the latest version of the `praisonai_tools` package, allowing access to the DirectoryReadTool and other utilities.
15 | 
16 | ## Example
17 | The DirectoryReadTool is simple to use. The code snippet below shows how to set up and use the tool to list the contents of a specified directory:
18 | 
19 | ```python
20 | from praisonai_tools import DirectoryReadTool
21 | 
22 | # Initialize the tool with the directory you want to explore
23 | tool = DirectoryReadTool(directory='/path/to/your/directory')
24 | 
25 | # Use the tool to list the contents of the specified directory
26 | directory_contents = tool.run()
27 | print(directory_contents)
28 | ```
29 | 
30 | This example demonstrates the essential steps to utilize the DirectoryReadTool effectively, highlighting its simplicity and user-friendly design.
31 | 
32 | ## Arguments
33 | The DirectoryReadTool requires minimal configuration for use. The essential argument for this tool is as follows:
34 | 
35 | - `directory`: A mandatory argument that specifies the path to the directory whose contents you wish to list. It accepts both absolute and relative paths, guiding the tool to the desired directory for content listing.
36 | 
37 | The DirectoryReadTool provides a user-friendly and efficient way to list directory contents, making it an invaluable tool for managing and inspecting directory structures.
38 | ```
39 | 
40 | This revised documentation for the DirectoryReadTool maintains the structure and content requirements as outlined, with adjustments made for clarity, consistency, and adherence to the high-quality standards exemplified in the provided documentation example.
41 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/directory_read_tool/directory_read_tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Optional, Type, Any
 3 | from pydantic.v1 import BaseModel, Field
 4 | from ..base_tool import BaseTool
 5 | 
 6 | class FixedDirectoryReadToolSchema(BaseModel):
 7 | 	"""Input for DirectoryReadTool."""
 8 | 	pass
 9 | 
10 | class DirectoryReadToolSchema(FixedDirectoryReadToolSchema):
11 | 	"""Input for DirectoryReadTool."""
12 | 	directory: str = Field(..., description="Mandatory directory to list content")
13 | 
14 | class DirectoryReadTool(BaseTool):
15 | 	name: str = "List files in directory"
16 | 	description: str = "A tool that can be used to recursively list a directory's content."
17 | 	args_schema: Type[BaseModel] = DirectoryReadToolSchema
18 | 	directory: Optional[str] = None
19 | 
20 | 	def __init__(self, directory: Optional[str] = None, **kwargs):
21 | 		super().__init__(**kwargs)
22 | 		if directory is not None:
23 | 			self.directory = directory
24 | 			self.description = f"A tool that can be used to list {directory}'s content."
25 | 			self.args_schema = FixedDirectoryReadToolSchema
26 | 			self._generate_description()
27 | 
28 | 	def _run(
29 | 		self,
30 | 		**kwargs: Any,
31 | 	) -> Any:
32 | 		directory = kwargs.get('directory', self.directory)
33 | 		if directory[-1] == "/":
34 | 			directory = directory[:-1]
35 | 		files_list = [f"{directory}/{(os.path.join(root, filename).replace(directory, '').lstrip(os.path.sep))}" for root, dirs, files in os.walk(directory) for filename in files]
36 | 		files = "\n- ".join(files_list)
37 | 		return f"File paths: \n-{files}"
38 | 
39 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/directory_search_tool/README.md:
--------------------------------------------------------------------------------
 1 | # DirectorySearchTool
 2 | 
 3 | ## Description
 4 | This tool is designed to perform a semantic search for queries within the content of a specified directory. Utilizing the RAG (Retrieval-Augmented Generation) methodology, it offers a powerful means to semantically navigate through the files of a given directory. The tool can be dynamically set to search any directory specified at runtime or can be pre-configured to search within a specific directory upon initialization.
 5 | 
 6 | ## Installation
 7 | To start using the DirectorySearchTool, you need to install the praisonai_tools package. Execute the following command in your terminal:
 8 | 
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | ## Example
14 | The following examples demonstrate how to initialize the DirectorySearchTool for different use cases and how to perform a search:
15 | 
16 | ```python
17 | from praisonai_tools import DirectorySearchTool
18 | 
19 | # To enable searching within any specified directory at runtime
20 | tool = DirectorySearchTool()
21 | 
22 | # Alternatively, to restrict searches to a specific directory
23 | tool = DirectorySearchTool(directory='/path/to/directory')
24 | ```
25 | 
26 | ## Arguments
27 | - `directory` : This string argument specifies the directory within which to search. It is mandatory if the tool has not been initialized with a directory; otherwise, the tool will only search within the initialized directory.
28 | 
29 | ## Custom model and embeddings
30 | 
31 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
32 | 
33 | ```python
34 | tool = DirectorySearchTool(
35 |     config=dict(
36 |         llm=dict(
37 |             provider="ollama", # or google, openai, anthropic, llama2, ...
38 |             config=dict(
39 |                 model="llama2",
40 |                 # temperature=0.5,
41 |                 # top_p=1,
42 |                 # stream=true,
43 |             ),
44 |         ),
45 |         embedder=dict(
46 |             provider="google",
47 |             config=dict(
48 |                 model="models/embedding-001",
49 |                 task_type="retrieval_document",
50 |                 # title="Embeddings",
51 |             ),
52 |         ),
53 |     )
54 | )
55 | ```
56 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/directory_search_tool/directory_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | 
 3 | from embedchain.loaders.directory_loader import DirectoryLoader
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedDirectorySearchToolSchema(BaseModel):
10 |     """Input for DirectorySearchTool."""
11 | 
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory search query you want to use to search the directory's content",
15 |     )
16 | 
17 | 
18 | class DirectorySearchToolSchema(FixedDirectorySearchToolSchema):
19 |     """Input for DirectorySearchTool."""
20 | 
21 |     directory: str = Field(..., description="Mandatory directory you want to search")
22 | 
23 | 
24 | class DirectorySearchTool(RagTool):
25 |     name: str = "Search a directory's content"
26 |     description: str = (
27 |         "A tool that can be used to semantic search a query from a directory's content."
28 |     )
29 |     args_schema: Type[BaseModel] = DirectorySearchToolSchema
30 | 
31 |     def __init__(self, directory: Optional[str] = None, **kwargs):
32 |         super().__init__(**kwargs)
33 |         if directory is not None:
34 |             self.add(directory)
35 |             self.description = f"A tool that can be used to semantic search a query the {directory} directory's content."
36 |             self.args_schema = FixedDirectorySearchToolSchema
37 |             self._generate_description()
38 | 
39 |     def add(
40 |         self,
41 |         *args: Any,
42 |         **kwargs: Any,
43 |     ) -> None:
44 |         kwargs["loader"] = DirectoryLoader(config=dict(recursive=True))
45 |         super().add(*args, **kwargs)
46 | 
47 |     def _before_run(
48 |         self,
49 |         query: str,
50 |         **kwargs: Any,
51 |     ) -> Any:
52 |         if "directory" in kwargs:
53 |             self.add(kwargs["directory"])
54 | 
55 |     def _run(
56 |         self,
57 |         search_query: str,
58 |         **kwargs: Any,
59 |     ) -> Any:
60 |         return super()._run(query=search_query)
61 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/docx_search_tool/README.md:
--------------------------------------------------------------------------------
 1 | # DOCXSearchTool
 2 | 
 3 | ## Description
 4 | The DOCXSearchTool is a RAG tool designed for semantic searching within DOCX documents. It enables users to effectively search and extract relevant information from DOCX files using query-based searches. This tool is invaluable for data analysis, information management, and research tasks, streamlining the process of finding specific information within large document collections.
 5 | 
 6 | ## Installation
 7 | Install the praisonai_tools package by running the following command in your terminal:
 8 | 
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | ## Example
14 | The following example demonstrates initializing the DOCXSearchTool to search within any DOCX file's content or with a specific DOCX file path.
15 | 
16 | ```python
17 | from praisonai_tools import DOCXSearchTool
18 | 
19 | # Initialize the tool to search within any DOCX file's content
20 | tool = DOCXSearchTool()
21 | 
22 | # OR
23 | 
24 | # Initialize the tool with a specific DOCX file, so the agent can only search the content of the specified DOCX file
25 | tool = DOCXSearchTool(docx='path/to/your/document.docx')
26 | ```
27 | 
28 | ## Arguments
29 | - `docx`: An optional file path to a specific DOCX document you wish to search. If not provided during initialization, the tool allows for later specification of any DOCX file's content path for searching.
30 | 
31 | ## Custom model and embeddings
32 | 
33 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
34 | 
35 | ```python
36 | tool = DOCXSearchTool(
37 |     config=dict(
38 |         llm=dict(
39 |             provider="ollama", # or google, openai, anthropic, llama2, ...
40 |             config=dict(
41 |                 model="llama2",
42 |                 # temperature=0.5,
43 |                 # top_p=1,
44 |                 # stream=true,
45 |             ),
46 |         ),
47 |         embedder=dict(
48 |             provider="google",
49 |             config=dict(
50 |                 model="models/embedding-001",
51 |                 task_type="retrieval_document",
52 |                 # title="Embeddings",
53 |             ),
54 |         ),
55 |     )
56 | )
57 | ```
58 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/docx_search_tool/docx_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | 
 3 | from embedchain.models.data_type import DataType
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedDOCXSearchToolSchema(BaseModel):
10 |     """Input for DOCXSearchTool."""
11 |     docx: Optional[str] = Field(..., description="Mandatory docx path you want to search")
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory search query you want to use to search the DOCX's content",
15 |     )
16 | 
17 | class DOCXSearchToolSchema(FixedDOCXSearchToolSchema):
18 |     """Input for DOCXSearchTool."""
19 |     search_query: str = Field(
20 |         ...,
21 |         description="Mandatory search query you want to use to search the DOCX's content",
22 |     )
23 | 
24 | class DOCXSearchTool(RagTool):
25 |     name: str = "Search a DOCX's content"
26 |     description: str = (
27 |         "A tool that can be used to semantic search a query from a DOCX's content."
28 |     )
29 |     args_schema: Type[BaseModel] = DOCXSearchToolSchema
30 | 
31 |     def __init__(self, docx: Optional[str] = None, **kwargs):
32 |         super().__init__(**kwargs)
33 |         if docx is not None:
34 |             self.add(docx)
35 |             self.description = f"A tool that can be used to semantic search a query the {docx} DOCX's content."
36 |             self.args_schema = FixedDOCXSearchToolSchema
37 |             self._generate_description()
38 | 
39 |     def add(
40 |         self,
41 |         *args: Any,
42 |         **kwargs: Any,
43 |     ) -> None:
44 |         kwargs["data_type"] = DataType.DOCX
45 |         super().add(*args, **kwargs)
46 | 
47 |     def _before_run(
48 |         self,
49 |         query: str,
50 |         **kwargs: Any,
51 |     ) -> Any:
52 |         if "docx" in kwargs:
53 |             self.add(kwargs["docx"])
54 | 
55 |     def _run(
56 |         self,
57 |         **kwargs: Any,
58 |     ) -> Any:
59 |         search_query = kwargs.get('search_query')
60 |         if search_query is None:
61 |             search_query = kwargs.get('query')
62 | 
63 |         docx = kwargs.get("docx")
64 |         if docx is not None:
65 |             self.add(docx)
66 |         return super()._run(query=search_query)
67 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/exa_tools/README.md:
--------------------------------------------------------------------------------
 1 | # EXASearchTool Documentation
 2 | 
 3 | ## Description
 4 | This tool is designed to perform a semantic search for a specified query from a text's content across the internet. It utilizes the `https://exa.ai/` API to fetch and display the most relevant search results based on the query provided by the user.
 5 | 
 6 | ## Installation
 7 | To incorporate this tool into your project, follow the installation instructions below:
 8 | ```shell
 9 | pip install 'praisonai[tools]'
10 | ```
11 | 
12 | ## Example
13 | The following example demonstrates how to initialize the tool and execute a search with a given query:
14 | 
15 | ```python
16 | from praisonai_tools import EXASearchTool
17 | 
18 | # Initialize the tool for internet searching capabilities
19 | tool = EXASearchTool()
20 | ```
21 | 
22 | ## Steps to Get Started
23 | To effectively use the `EXASearchTool`, follow these steps:
24 | 
25 | 1. **Package Installation**: Confirm that the `praisonai[tools]` package is installed in your Python environment.
26 | 2. **API Key Acquisition**: Acquire a `https://exa.ai/` API key by registering for a free account at `https://exa.ai/`.
27 | 3. **Environment Configuration**: Store your obtained API key in an environment variable named `EXA_API_KEY` to facilitate its use by the tool.
28 | 
29 | ## Conclusion
30 | By integrating the `EXASearchTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.
31 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/exa_tools/exa_base_tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import Type
 3 | from pydantic.v1 import BaseModel, Field
 4 | from praisonai_tools.tools.base_tool import BaseTool
 5 | 
 6 | class EXABaseToolToolSchema(BaseModel):
 7 | 	"""Input for EXABaseTool."""
 8 | 	search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
 9 | 
10 | class EXABaseTool(BaseTool):
11 | 	name: str = "Search the internet"
12 | 	description: str = "A tool that can be used to search the internet from a search_query"
13 | 	args_schema: Type[BaseModel] = EXABaseToolToolSchema
14 | 	search_url: str = "https://api.exa.ai/search"
15 | 	n_results: int = None
16 | 	headers: dict = {
17 | 			"accept": "application/json",
18 | 			"content-type": "application/json",
19 | 		}
20 | 
21 | 	def _parse_results(self, results):
22 | 		stirng = []
23 | 		for result in results:
24 | 			try:
25 | 				stirng.append('\n'.join([
26 | 						f"Title: {result['title']}",
27 | 						f"Score: {result['score']}",
28 | 						f"Url: {result['url']}",
29 | 						f"ID: {result['id']}",
30 | 						"---"
31 | 				]))
32 | 			except KeyError:
33 | 				next
34 | 
35 | 		content = '\n'.join(stirng)
36 | 		return f"\nSearch results: {content}\n"
37 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/exa_tools/exa_search_tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | from typing import Any
 4 | 
 5 | from .exa_base_tool import EXABaseTool
 6 | 
 7 | class EXASearchTool(EXABaseTool):
 8 |   def _run(
 9 |     self,
10 |     **kwargs: Any,
11 |   ) -> Any:
12 |     search_query = kwargs.get('search_query')
13 |     if search_query is None:
14 |       search_query = kwargs.get('query')
15 | 
16 |     payload = {
17 |         "query": search_query,
18 |         "type": "magic",
19 |     }
20 | 
21 |     headers = self.headers.copy()
22 |     headers["x-api-key"] = os.environ['EXA_API_KEY']
23 | 
24 |     response = requests.post(self.search_url, json=payload, headers=headers)
25 |     results = response.json()
26 |     if 'results' in results:
27 |       results = super()._parse_results(results['results'])
28 |     return results
29 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/file_read_tool/README.md:
--------------------------------------------------------------------------------
 1 | # FileReadTool
 2 | 
 3 | ## Description
 4 | The FileReadTool is a versatile component of the praisonai_tools package, designed to streamline the process of reading and retrieving content from files. It is particularly useful in scenarios such as batch text file processing, runtime configuration file reading, and data importation for analytics. This tool supports various text-based file formats including `.txt`, `.csv`, `.json`, and adapts its functionality based on the file type, for instance, converting JSON content into a Python dictionary for easy use.
 5 | 
 6 | ## Installation
 7 | Install the praisonai_tools package to use the FileReadTool in your projects:
 8 | 
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | ## Example
14 | To get started with the FileReadTool:
15 | 
16 | ```python
17 | from praisonai_tools import FileReadTool
18 | 
19 | # Initialize the tool to read any files the agents knows or lean the path for
20 | file_read_tool = FileReadTool()
21 | 
22 | # OR
23 | 
24 | # Initialize the tool with a specific file path, so the agent can only read the content of the specified file
25 | file_read_tool = FileReadTool(file_path='path/to/your/file.txt')
26 | ```
27 | 
28 | ## Arguments
29 | - `file_path`: The path to the file you want to read. It accepts both absolute and relative paths. Ensure the file exists and you have the necessary permissions to access it.


--------------------------------------------------------------------------------
/praisonai_tools/tools/file_read_tool/file_read_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Type, Any
 2 | from pydantic.v1 import BaseModel, Field
 3 | from ..base_tool import BaseTool
 4 | 
 5 | 
 6 | class FixedFileReadToolSchema(BaseModel):
 7 |     """Input for FileReadTool."""
 8 |     pass
 9 | 
10 | 
11 | class FileReadToolSchema(FixedFileReadToolSchema):
12 |     """Input for FileReadTool."""
13 |     file_path: str = Field(
14 |         ...,
15 |         description="Mandatory file full path to read the file"
16 |     )
17 | 
18 | 
19 | class FileReadTool(BaseTool):
20 |     name: str = "Read a file's content"
21 |     description: str = "A tool that can be used to read a file's content."
22 |     args_schema: Type[BaseModel] = FileReadToolSchema
23 |     file_path: Optional[str] = None
24 | 
25 |     def __init__(
26 |         self,
27 |         file_path: Optional[str] = None,
28 |         **kwargs
29 |     ):
30 |         super().__init__(**kwargs)
31 |         if file_path is not None:
32 |             self.file_path = file_path
33 |             self.description = f"A tool that can be used to read {file_path}'s content."
34 |             self.args_schema = FixedFileReadToolSchema
35 |             self._generate_description()
36 | 
37 |     def _run(
38 |         self,
39 |         **kwargs: Any,
40 |     ) -> Any:
41 |         try:
42 |             file_path = kwargs.get('file_path', self.file_path)
43 |             with open(file_path, 'r') as file:
44 |                 return file.read()
45 |         except Exception as e:
46 |             return f"Fail to read the file {file_path}. Error: {e}"
47 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/github_search_tool/README.md:
--------------------------------------------------------------------------------
 1 | # GithubSearchTool
 2 | 
 3 | ## Description
 4 | The GithubSearchTool is a Read, Append, and Generate (RAG) tool specifically designed for conducting semantic searches within GitHub repositories. Utilizing advanced semantic search capabilities, it sifts through code, pull requests, issues, and repositories, making it an essential tool for developers, researchers, or anyone in need of precise information from GitHub.
 5 | 
 6 | ## Installation
 7 | To use the GithubSearchTool, first ensure the praisonai_tools package is installed in your Python environment:
 8 | 
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | This command installs the necessary package to run the GithubSearchTool along with any other tools included in the praisonai_tools package.
14 | 
15 | ## Example
16 | Here’s how you can use the GithubSearchTool to perform semantic searches within a GitHub repository:
17 | ```python
18 | from praisonai_tools import GithubSearchTool
19 | 
20 | # Initialize the tool for semantic searches within a specific GitHub repository
21 | tool = GithubSearchTool(
22 |     gh_token='...',
23 | 	github_repo='https://github.com/example/repo',
24 | 	content_types=['code', 'issue'] # Options: code, repo, pr, issue
25 | )
26 | 
27 | # OR
28 | 
29 | # Initialize the tool for semantic searches within a specific GitHub repository, so the agent can search any repository if it learns about during its execution
30 | tool = GithubSearchTool(
31 |     gh_token='...',
32 | 	content_types=['code', 'issue'] # Options: code, repo, pr, issue
33 | )
34 | ```
35 | 
36 | ## Arguments
37 | - `gh_token` : The GitHub token used to authenticate the search. This is a mandatory field and allows the tool to access the GitHub API for conducting searches.
38 | - `github_repo` : The URL of the GitHub repository where the search will be conducted. This is a mandatory field and specifies the target repository for your search.
39 | - `content_types` : Specifies the types of content to include in your search. You must provide a list of content types from the following options: `code` for searching within the code, `repo` for searching within the repository's general information, `pr` for searching within pull requests, and `issue` for searching within issues. This field is mandatory and allows tailoring the search to specific content types within the GitHub repository.
40 | 
41 | ## Custom model and embeddings
42 | 
43 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
44 | 
45 | ```python
46 | tool = GithubSearchTool(
47 |     config=dict(
48 |         llm=dict(
49 |             provider="ollama", # or google, openai, anthropic, llama2, ...
50 |             config=dict(
51 |                 model="llama2",
52 |                 # temperature=0.5,
53 |                 # top_p=1,
54 |                 # stream=true,
55 |             ),
56 |         ),
57 |         embedder=dict(
58 |             provider="google",
59 |             config=dict(
60 |                 model="models/embedding-001",
61 |                 task_type="retrieval_document",
62 |                 # title="Embeddings",
63 |             ),
64 |         ),
65 |     )
66 | )
67 | ```
68 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/github_search_tool/github_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, List, Optional, Type
 2 | 
 3 | from embedchain.loaders.github import GithubLoader
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedGithubSearchToolSchema(BaseModel):
10 |     """Input for GithubSearchTool."""
11 | 
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory search query you want to use to search the github repo's content",
15 |     )
16 | 
17 | 
18 | class GithubSearchToolSchema(FixedGithubSearchToolSchema):
19 |     """Input for GithubSearchTool."""
20 | 
21 |     github_repo: str = Field(..., description="Mandatory github you want to search")
22 |     content_types: List[str] = Field(
23 |         ...,
24 |         description="Mandatory content types you want to be included search, options: [code, repo, pr, issue]",
25 |     )
26 | 
27 | 
28 | class GithubSearchTool(RagTool):
29 |     name: str = "Search a github repo's content"
30 |     description: str = "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
31 |     summarize: bool = False
32 |     gh_token: str
33 |     args_schema: Type[BaseModel] = GithubSearchToolSchema
34 |     content_types: List[str]
35 | 
36 |     def __init__(self, github_repo: Optional[str] = None, **kwargs):
37 |         super().__init__(**kwargs)
38 |         if github_repo is not None:
39 |             self.add(repo=github_repo)
40 |             self.description = f"A tool that can be used to semantic search a query the {github_repo} github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities."
41 |             self.args_schema = FixedGithubSearchToolSchema
42 |             self._generate_description()
43 | 
44 |     def add(
45 |         self,
46 |         repo: str,
47 |         content_types: List[str] | None = None,
48 |         **kwargs: Any,
49 |     ) -> None:
50 |         content_types = content_types or self.content_types
51 | 
52 |         kwargs["data_type"] = "github"
53 |         kwargs["loader"] = GithubLoader(config={"token": self.gh_token})
54 |         super().add(f"repo:{repo} type:{','.join(content_types)}", **kwargs)
55 | 
56 |     def _before_run(
57 |         self,
58 |         query: str,
59 |         **kwargs: Any,
60 |     ) -> Any:
61 |         if "github_repo" in kwargs:
62 |             self.add(
63 |                 repo=kwargs["github_repo"], content_types=kwargs.get("content_types")
64 |             )
65 | 
66 |     def _run(
67 |         self,
68 |         search_query: str,
69 |         **kwargs: Any,
70 |     ) -> Any:
71 |         return super()._run(query=search_query)
72 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/json_search_tool/README.md:
--------------------------------------------------------------------------------
 1 | # JSONSearchTool
 2 | 
 3 | ## Description
 4 | This tool is used to perform a RAG search within a JSON file's content. It allows users to initiate a search with a specific JSON path, focusing the search operation within that particular JSON file. If the path is provided at initialization, the tool restricts its search scope to the specified JSON file, thereby enhancing the precision of search results.
 5 | 
 6 | ## Installation
 7 | Install the praisonai_tools package by executing the following command in your terminal:
 8 | 
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | ## Example
14 | Below are examples demonstrating how to use the JSONSearchTool for searching within JSON files. You can either search any JSON content or restrict the search to a specific JSON file.
15 | 
16 | ```python
17 | from praisonai_tools import JSONSearchTool
18 | 
19 | # Example 1: Initialize the tool for a general search across any JSON content. This is useful when the path is known or can be discovered during execution.
20 | tool = JSONSearchTool()
21 | 
22 | # Example 2: Initialize the tool with a specific JSON path, limiting the search to a particular JSON file.
23 | tool = JSONSearchTool(json_path='./path/to/your/file.json')
24 | ```
25 | 
26 | ## Arguments
27 | - `json_path` (str): An optional argument that defines the path to the JSON file to be searched. This parameter is only necessary if the tool is initialized without a specific JSON path. Providing this argument restricts the search to the specified JSON file.
28 | 
29 | ## Custom model and embeddings
30 | 
31 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
32 | 
33 | ```python
34 | tool = JSONSearchTool(
35 |     config=dict(
36 |         llm=dict(
37 |             provider="ollama", # or google, openai, anthropic, llama2, ...
38 |             config=dict(
39 |                 model="llama2",
40 |                 # temperature=0.5,
41 |                 # top_p=1,
42 |                 # stream=true,
43 |             ),
44 |         ),
45 |         embedder=dict(
46 |             provider="google",
47 |             config=dict(
48 |                 model="models/embedding-001",
49 |                 task_type="retrieval_document",
50 |                 # title="Embeddings",
51 |             ),
52 |         ),
53 |     )
54 | )
55 | ```
56 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/json_search_tool/json_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | 
 3 | from embedchain.models.data_type import DataType
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedJSONSearchToolSchema(BaseModel):
10 |     """Input for JSONSearchTool."""
11 | 
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory search query you want to use to search the JSON's content",
15 |     )
16 | 
17 | 
18 | class JSONSearchToolSchema(FixedJSONSearchToolSchema):
19 |     """Input for JSONSearchTool."""
20 | 
21 |     json_path: str = Field(..., description="Mandatory json path you want to search")
22 | 
23 | 
24 | class JSONSearchTool(RagTool):
25 |     name: str = "Search a JSON's content"
26 |     description: str = (
27 |         "A tool that can be used to semantic search a query from a JSON's content."
28 |     )
29 |     args_schema: Type[BaseModel] = JSONSearchToolSchema
30 | 
31 |     def __init__(self, json_path: Optional[str] = None, **kwargs):
32 |         super().__init__(**kwargs)
33 |         if json_path is not None:
34 |             self.add(json_path)
35 |             self.description = f"A tool that can be used to semantic search a query the {json_path} JSON's content."
36 |             self.args_schema = FixedJSONSearchToolSchema
37 |             self._generate_description()
38 | 
39 |     def add(
40 |         self,
41 |         *args: Any,
42 |         **kwargs: Any,
43 |     ) -> None:
44 |         kwargs["data_type"] = DataType.JSON
45 |         super().add(*args, **kwargs)
46 | 
47 |     def _before_run(
48 |         self,
49 |         query: str,
50 |         **kwargs: Any,
51 |     ) -> Any:
52 |         if "json_path" in kwargs:
53 |             self.add(kwargs["json_path"])
54 | 
55 |     def _run(
56 |         self,
57 |         search_query: str,
58 |         **kwargs: Any,
59 |     ) -> Any:
60 |         return super()._run(query=search_query)
61 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/llamaindex_tool/README.md:
--------------------------------------------------------------------------------
 1 | # LlamaIndexTool Documentation
 2 | 
 3 | ## Description
 4 | This tool is designed to be a general wrapper around LlamaIndex tools and query engines, enabling you to leverage LlamaIndex resources
 5 | in terms of RAG/agentic pipelines as tools to plug into praisonai agents.
 6 | 
 7 | ## Installation
 8 | To incorporate this tool into your project, follow the installation instructions below:
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | ## Example
14 | The following example demonstrates how to initialize the tool and execute a search with a given query:
15 | 
16 | ```python
17 | from praisonai_tools import LlamaIndexTool
18 | 
19 | # Initialize the tool from a LlamaIndex Tool
20 | 
21 | ## Example 1: Initialize from FunctionTool
22 | from llama_index.core.tools import FunctionTool
23 | 
24 | your_python_function = lambda ...: ...
25 | og_tool = FunctionTool.from_defaults(your_python_function, name="<name>", description='<description>')
26 | tool = LlamaIndexTool.from_tool(og_tool)
27 | 
28 | ## Example 2: Initialize from LlamaHub Tools
29 | from llama_index.tools.wolfram_alpha import WolframAlphaToolSpec
30 | wolfram_spec = WolframAlphaToolSpec(app_id="<app_id>")
31 | wolfram_tools = wolfram_spec.to_tool_list()
32 | tools = [LlamaIndexTool.from_tool(t) for t in wolfram_tools]
33 | 
34 | 
35 | # Initialize Tool from a LlamaIndex Query Engine
36 | 
37 | ## NOTE: LlamaIndex has a lot of query engines, define whatever query engine you want
38 | query_engine = index.as_query_engine() 
39 | query_tool = LlamaIndexTool.from_query_engine(
40 |     query_engine,
41 |     name="Uber 2019 10K Query Tool",
42 |     description="Use this tool to lookup the 2019 Uber 10K Annual Report"
43 | )
44 | 
45 | ```
46 | 
47 | ## Steps to Get Started
48 | To effectively use the `LlamaIndexTool`, follow these steps:
49 | 
50 | 1. **Install praisonai**: Confirm that the `praisonai[tools]` package is installed in your Python environment.
51 | 2. **Install and use LlamaIndex**: Follow LlamaIndex documentation (https://docs.llamaindex.ai/) to setup a RAG/agent pipeline.
52 | 
53 | 
54 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/llamaindex_tool/llamaindex_tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import requests
 4 | 
 5 | from typing import Type, Any, cast, Optional
 6 | from pydantic.v1 import BaseModel, Field
 7 | from praisonai_tools.tools.base_tool import BaseTool
 8 | 
 9 | class LlamaIndexTool(BaseTool):
10 |     """Tool to wrap LlamaIndex tools/query engines."""
11 |     llama_index_tool: Any
12 | 
13 |     def _run(
14 | 		self,
15 |         *args: Any,
16 | 		**kwargs: Any,
17 | 	) -> Any:
18 |         """Run tool."""
19 |         from llama_index.core.tools import BaseTool as LlamaBaseTool
20 |         tool = cast(LlamaBaseTool, self.llama_index_tool)
21 |         return tool(*args, **kwargs)
22 | 	
23 |     @classmethod
24 |     def from_tool(
25 |         cls,
26 |         tool: Any,
27 |         **kwargs: Any
28 |     ) -> "LlamaIndexTool":
29 |         from llama_index.core.tools import BaseTool as LlamaBaseTool
30 |         
31 |         if not isinstance(tool, LlamaBaseTool):
32 |             raise ValueError(f"Expected a LlamaBaseTool, got {type(tool)}")
33 |         tool = cast(LlamaBaseTool, tool)
34 | 
35 |         if tool.metadata.fn_schema is None:
36 |             raise ValueError("The LlamaIndex tool does not have an fn_schema specified.")
37 |         args_schema = cast(Type[BaseModel], tool.metadata.fn_schema)
38 |         
39 |         return cls(
40 |             name=tool.metadata.name,
41 |             description=tool.metadata.description,
42 |             args_schema=args_schema,
43 |             llama_index_tool=tool,
44 |             **kwargs
45 |         )
46 | 
47 | 
48 |     @classmethod
49 |     def from_query_engine(
50 |         cls,
51 |         query_engine: Any,
52 |         name: Optional[str] = None,
53 |         description: Optional[str] = None,
54 |         return_direct: bool = False,
55 |         **kwargs: Any
56 |     ) -> "LlamaIndexTool":
57 |         from llama_index.core.query_engine import BaseQueryEngine
58 |         from llama_index.core.tools import QueryEngineTool
59 | 
60 |         if not isinstance(query_engine, BaseQueryEngine):
61 |             raise ValueError(f"Expected a BaseQueryEngine, got {type(query_engine)}")
62 | 
63 |         # NOTE: by default the schema expects an `input` variable. However this 
64 |         # confuses praisonai so we are renaming to `query`.
65 |         class QueryToolSchema(BaseModel):
66 |             """Schema for query tool."""
67 |             query: str = Field(..., description="Search query for the query tool.")
68 | 
69 |         # NOTE: setting `resolve_input_errors` to True is important because the schema expects `input` but we are using `query`
70 |         query_engine_tool = QueryEngineTool.from_defaults(
71 |             query_engine,
72 |             name=name,
73 |             description=description,
74 |             return_direct=return_direct,
75 |             resolve_input_errors=True,  
76 |         )
77 |         # HACK: we are replacing the schema with our custom schema
78 |         query_engine_tool.metadata.fn_schema = QueryToolSchema
79 |         
80 |         return cls.from_tool(
81 |             query_engine_tool,
82 |             **kwargs
83 |         )
84 |         


--------------------------------------------------------------------------------
/praisonai_tools/tools/mdx_seach_tool/README.md:
--------------------------------------------------------------------------------
 1 | # MDXSearchTool
 2 | 
 3 | ## Description
 4 | The MDX Search Tool, a key component of the `praisonai_tools` package, is designed for advanced market data extraction, offering invaluable support to researchers and analysts requiring immediate market insights in the AI sector. With its ability to interface with various data sources and tools, it streamlines the process of acquiring, reading, and organizing market data efficiently.
 5 | 
 6 | ## Installation
 7 | To utilize the MDX Search Tool, ensure the `praisonai_tools` package is installed. If not already present, install it using the following command:
 8 | 
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | ## Example
14 | Configuring and using the MDX Search Tool involves setting up environment variables and utilizing the tool within a praisonai project for market research. Here's a simple example:
15 | 
16 | ```python
17 | from praisonai_tools import MDXSearchTool
18 | 
19 | # Initialize the tool so the agent can search any MDX content if it learns about during its execution
20 | tool = MDXSearchTool()
21 | 
22 | # OR
23 | 
24 | # Initialize the tool with a specific MDX file path for exclusive search within that document
25 | tool = MDXSearchTool(mdx='path/to/your/document.mdx')
26 | ```
27 | 
28 | ## Arguments
29 | - mdx: **Optional** The MDX path for the search. Can be provided at initialization
30 | 
31 | ## Custom model and embeddings
32 | 
33 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
34 | 
35 | ```python
36 | tool = MDXSearchTool(
37 |     config=dict(
38 |         llm=dict(
39 |             provider="ollama", # or google, openai, anthropic, llama2, ...
40 |             config=dict(
41 |                 model="llama2",
42 |                 # temperature=0.5,
43 |                 # top_p=1,
44 |                 # stream=true,
45 |             ),
46 |         ),
47 |         embedder=dict(
48 |             provider="google",
49 |             config=dict(
50 |                 model="models/embedding-001",
51 |                 task_type="retrieval_document",
52 |                 # title="Embeddings",
53 |             ),
54 |         ),
55 |     )
56 | )
57 | ```
58 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/mdx_seach_tool/mdx_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | 
 3 | from embedchain.models.data_type import DataType
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedMDXSearchToolSchema(BaseModel):
10 |     """Input for MDXSearchTool."""
11 | 
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory search query you want to use to search the MDX's content",
15 |     )
16 | 
17 | 
18 | class MDXSearchToolSchema(FixedMDXSearchToolSchema):
19 |     """Input for MDXSearchTool."""
20 | 
21 |     mdx: str = Field(..., description="Mandatory mdx path you want to search")
22 | 
23 | 
24 | class MDXSearchTool(RagTool):
25 |     name: str = "Search a MDX's content"
26 |     description: str = (
27 |         "A tool that can be used to semantic search a query from a MDX's content."
28 |     )
29 |     args_schema: Type[BaseModel] = MDXSearchToolSchema
30 | 
31 |     def __init__(self, mdx: Optional[str] = None, **kwargs):
32 |         super().__init__(**kwargs)
33 |         if mdx is not None:
34 |             self.add(mdx)
35 |             self.description = f"A tool that can be used to semantic search a query the {mdx} MDX's content."
36 |             self.args_schema = FixedMDXSearchToolSchema
37 |             self._generate_description()
38 | 
39 |     def add(
40 |         self,
41 |         *args: Any,
42 |         **kwargs: Any,
43 |     ) -> None:
44 |         kwargs["data_type"] = DataType.MDX
45 |         super().add(*args, **kwargs)
46 | 
47 |     def _before_run(
48 |         self,
49 |         query: str,
50 |         **kwargs: Any,
51 |     ) -> Any:
52 |         if "mdx" in kwargs:
53 |             self.add(kwargs["mdx"])
54 | 
55 |     def _run(
56 |         self,
57 |         search_query: str,
58 |         **kwargs: Any,
59 |     ) -> Any:
60 |         return super()._run(query=search_query)
61 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/pdf_search_tool/README.md:
--------------------------------------------------------------------------------
 1 | # PDFSearchTool
 2 | 
 3 | ## Description
 4 | The PDFSearchTool is a RAG tool designed for semantic searches within PDF content. It allows for inputting a search query and a PDF document, leveraging advanced search techniques to find relevant content efficiently. This capability makes it especially useful for extracting specific information from large PDF files quickly.
 5 | 
 6 | ## Installation
 7 | To get started with the PDFSearchTool, first, ensure the praisonai_tools package is installed with the following command:
 8 | 
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | ## Example
14 | Here's how to use the PDFSearchTool to search within a PDF document:
15 | 
16 | ```python
17 | from praisonai_tools import PDFSearchTool
18 | 
19 | # Initialize the tool allowing for any PDF content search if the path is provided during execution
20 | tool = PDFSearchTool()
21 | 
22 | # OR
23 | 
24 | # Initialize the tool with a specific PDF path for exclusive search within that document
25 | tool = PDFSearchTool(pdf='path/to/your/document.pdf')
26 | ```
27 | 
28 | ## Arguments
29 | - `pdf`: **Optinal** The PDF path for the search. Can be provided at initialization or within the `run` method's arguments. If provided at initialization, the tool confines its search to the specified document.
30 | 
31 | ## Custom model and embeddings
32 | 
33 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
34 | 
35 | ```python
36 | tool = PDFSearchTool(
37 |     config=dict(
38 |         llm=dict(
39 |             provider="ollama", # or google, openai, anthropic, llama2, ...
40 |             config=dict(
41 |                 model="llama2",
42 |                 # temperature=0.5,
43 |                 # top_p=1,
44 |                 # stream=true,
45 |             ),
46 |         ),
47 |         embedder=dict(
48 |             provider="google",
49 |             config=dict(
50 |                 model="models/embedding-001",
51 |                 task_type="retrieval_document",
52 |                 # title="Embeddings",
53 |             ),
54 |         ),
55 |     )
56 | )
57 | ```
58 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/pdf_search_tool/pdf_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | 
 3 | from embedchain.models.data_type import DataType
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedPDFSearchToolSchema(BaseModel):
10 |     """Input for PDFSearchTool."""
11 | 
12 |     query: str = Field(
13 |         ..., description="Mandatory query you want to use to search the PDF's content"
14 |     )
15 | 
16 | 
17 | class PDFSearchToolSchema(FixedPDFSearchToolSchema):
18 |     """Input for PDFSearchTool."""
19 | 
20 |     pdf: str = Field(..., description="Mandatory pdf path you want to search")
21 | 
22 | 
23 | class PDFSearchTool(RagTool):
24 |     name: str = "Search a PDF's content"
25 |     description: str = (
26 |         "A tool that can be used to semantic search a query from a PDF's content."
27 |     )
28 |     args_schema: Type[BaseModel] = PDFSearchToolSchema
29 | 
30 |     def __init__(self, pdf: Optional[str] = None, **kwargs):
31 |         super().__init__(**kwargs)
32 |         if pdf is not None:
33 |             self.add(pdf)
34 |             self.description = f"A tool that can be used to semantic search a query the {pdf} PDF's content."
35 |             self.args_schema = FixedPDFSearchToolSchema
36 |             self._generate_description()
37 | 
38 |     def add(
39 |         self,
40 |         *args: Any,
41 |         **kwargs: Any,
42 |     ) -> None:
43 |         kwargs["data_type"] = DataType.PDF_FILE
44 |         super().add(*args, **kwargs)
45 | 
46 |     def _before_run(
47 |         self,
48 |         query: str,
49 |         **kwargs: Any,
50 |     ) -> Any:
51 |         if "pdf" in kwargs:
52 |             self.add(kwargs["pdf"])
53 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/pdf_text_writing_tool/pdf_text_writing_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | from pydantic import BaseModel, Field
 3 | from pypdf import PdfReader, PdfWriter, PageObject, ContentStream, NameObject, Font
 4 | from pathlib import Path
 5 | 
 6 | 
 7 | class PDFTextWritingToolSchema(BaseModel):
 8 |     """Input schema for PDFTextWritingTool."""
 9 |     pdf_path: str = Field(..., description="Path to the PDF file to modify")
10 |     text: str = Field(..., description="Text to add to the PDF")
11 |     position: tuple = Field(..., description="Tuple of (x, y) coordinates for text placement")
12 |     font_size: int = Field(default=12, description="Font size of the text")
13 |     font_color: str = Field(default="0 0 0 rg", description="RGB color code for the text")
14 |     font_name: Optional[str] = Field(default="F1", description="Font name for standard fonts")
15 |     font_file: Optional[str] = Field(None, description="Path to a .ttf font file for custom font usage")
16 |     page_number: int = Field(default=0, description="Page number to add text to")
17 | 
18 | 
19 | class PDFTextWritingTool(RagTool):
20 |     """A tool to add text to specific positions in a PDF, with custom font support."""
21 |     name: str = "PDF Text Writing Tool"
22 |     description: str = "A tool that can write text to a specific position in a PDF document, with optional custom font embedding."
23 |     args_schema: Type[BaseModel] = PDFTextWritingToolSchema
24 | 
25 |     def run(self, pdf_path: str, text: str, position: tuple, font_size: int, font_color: str,
26 |             font_name: str = "F1", font_file: Optional[str] = None, page_number: int = 0, **kwargs) -> str:
27 |         reader = PdfReader(pdf_path)
28 |         writer = PdfWriter()
29 | 
30 |         if page_number >= len(reader.pages):
31 |             return "Page number out of range."
32 | 
33 |         page: PageObject = reader.pages[page_number]
34 |         content = ContentStream(page["/Contents"].data, reader)
35 | 
36 |         if font_file:
37 |             # Check if the font file exists
38 |             if not Path(font_file).exists():
39 |                 return "Font file does not exist."
40 | 
41 |             # Embed the custom font
42 |             font_name = self.embed_font(writer, font_file)
43 | 
44 |         # Prepare text operation with the custom or standard font
45 |         x_position, y_position = position
46 |         text_operation = f"BT /{font_name} {font_size} Tf {x_position} {y_position} Td ({text}) Tj ET"
47 |         content.operations.append([font_color])  # Set color
48 |         content.operations.append([text_operation])  # Add text
49 | 
50 |         # Replace old content with new content
51 |         page[NameObject("/Contents")] = content
52 |         writer.add_page(page)
53 | 
54 |         # Save the new PDF
55 |         output_pdf_path = "modified_output.pdf"
56 |         with open(output_pdf_path, "wb") as out_file:
57 |             writer.write(out_file)
58 | 
59 |         return f"Text added to {output_pdf_path} successfully."
60 | 
61 |     def embed_font(self, writer: PdfWriter, font_file: str) -> str:
62 |         """Embeds a TTF font into the PDF and returns the font name."""
63 |         with open(font_file, "rb") as file:
64 |             font = Font.true_type(file.read())
65 |         font_ref = writer.add_object(font)
66 |         return font_ref


--------------------------------------------------------------------------------
/praisonai_tools/tools/pg_seach_tool/README.md:
--------------------------------------------------------------------------------
 1 | # PGSearchTool
 2 | 
 3 | ## Description
 4 | This tool is designed to facilitate semantic searches within PostgreSQL database tables. Leveraging the RAG (Retrieve and Generate) technology, the PGSearchTool provides users with an efficient means of querying database table content, specifically tailored for PostgreSQL databases. It simplifies the process of finding relevant data through semantic search queries, making it an invaluable resource for users needing to perform advanced queries on extensive datasets within a PostgreSQL database.
 5 | 
 6 | ## Installation
 7 | To install the `praisonai_tools` package and utilize the PGSearchTool, execute the following command in your terminal:
 8 | 
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | ## Example
14 | Below is an example showcasing how to use the PGSearchTool to conduct a semantic search on a table within a PostgreSQL database:
15 | 
16 | ```python
17 | from praisonai_tools import PGSearchTool
18 | 
19 | # Initialize the tool with the database URI and the target table name
20 | tool = PGSearchTool(db_uri='postgresql://user:password@localhost:5432/mydatabase', table_name='employees')
21 | 
22 | ```
23 | 
24 | ## Arguments
25 | The PGSearchTool requires the following arguments for its operation:
26 | 
27 | - `db_uri`: A string representing the URI of the PostgreSQL database to be queried. This argument is mandatory and must include the necessary authentication details and the location of the database.
28 | - `table_name`: A string specifying the name of the table within the database on which the semantic search will be performed. This argument is mandatory.
29 | 
30 | ## Custom model and embeddings
31 | 
32 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
33 | 
34 | ```python
35 | tool = PGSearchTool(
36 |     config=dict(
37 |         llm=dict(
38 |             provider="ollama", # or google, openai, anthropic, llama2, ...
39 |             config=dict(
40 |                 model="llama2",
41 |                 # temperature=0.5,
42 |                 # top_p=1,
43 |                 # stream=true,
44 |             ),
45 |         ),
46 |         embedder=dict(
47 |             provider="google",
48 |             config=dict(
49 |                 model="models/embedding-001",
50 |                 task_type="retrieval_document",
51 |                 # title="Embeddings",
52 |             ),
53 |         ),
54 |     )
55 | )
56 | ```
57 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/pg_seach_tool/pg_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Type
 2 | 
 3 | from embedchain.loaders.postgres import PostgresLoader
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class PGSearchToolSchema(BaseModel):
10 |     """Input for PGSearchTool."""
11 | 
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory semantic search query you want to use to search the database's content",
15 |     )
16 | 
17 | 
18 | class PGSearchTool(RagTool):
19 |     name: str = "Search a database's table content"
20 |     description: str = "A tool that can be used to semantic search a query from a database table's content."
21 |     args_schema: Type[BaseModel] = PGSearchToolSchema
22 |     db_uri: str = Field(..., description="Mandatory database URI")
23 | 
24 |     def __init__(self, table_name: str, **kwargs):
25 |         super().__init__(**kwargs)
26 |         self.add(table_name)
27 |         self.description = f"A tool that can be used to semantic search a query the {table_name} database table's content."
28 |         self._generate_description()
29 | 
30 |     def add(
31 |         self,
32 |         table_name: str,
33 |         **kwargs: Any,
34 |     ) -> None:
35 |         kwargs["data_type"] = "postgres"
36 |         kwargs["loader"] = PostgresLoader(config=dict(url=self.db_uri))
37 |         super().add(f"SELECT * FROM {table_name};", **kwargs)
38 | 
39 |     def _run(
40 |         self,
41 |         search_query: str,
42 |         **kwargs: Any,
43 |     ) -> Any:
44 |         return super()._run(query=search_query)
45 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/rag/README.md:
--------------------------------------------------------------------------------
 1 | # RagTool: A Dynamic Knowledge Base Tool
 2 | 
 3 | RagTool is designed to answer questions by leveraging the power of RAG by leveraging (EmbedChain). It integrates seamlessly with the praisonai ecosystem, offering a versatile and powerful solution for information retrieval.
 4 | 
 5 | ## **Overview**
 6 | 
 7 | RagTool enables users to dynamically query a knowledge base, making it an ideal tool for applications requiring access to a vast array of information. Its flexible design allows for integration with various data sources, including files, directories, web pages, yoututbe videos and custom configurations.
 8 | 
 9 | ## **Usage**
10 | 
11 | RagTool can be instantiated with data from different sources, including:
12 | 
13 | - 📰 PDF file
14 | - 📊 CSV file
15 | - 📃 JSON file
16 | - 📝 Text
17 | - 📁 Directory/ Folder
18 | - 🌐 HTML Web page
19 | - 📽️ Youtube Channel
20 | - 📺 Youtube Video
21 | - 📚 Docs website
22 | - 📝 MDX file
23 | - 📄 DOCX file
24 | - 🧾 XML file
25 | - 📬 Gmail
26 | - 📝 Github
27 | - 🐘 Postgres
28 | - 🐬 MySQL
29 | - 🤖 Slack
30 | - 💬 Discord
31 | - 🗨️ Discourse
32 | - 📝 Substack
33 | - 🐝 Beehiiv
34 | - 💾 Dropbox
35 | - 🖼️ Image
36 | - ⚙️ Custom
37 | 
38 | #### **Creating an Instance**
39 | 
40 | ```python
41 | from praisonai_tools.tools.rag_tool import RagTool
42 | 
43 | # Example: Loading from a file
44 | rag_tool = RagTool().from_file('path/to/your/file.txt')
45 | 
46 | # Example: Loading from a directory
47 | rag_tool = RagTool().from_directory('path/to/your/directory')
48 | 
49 | # Example: Loading from a web page
50 | rag_tool = RagTool().from_web_page('https://example.com')
51 | ```
52 | 
53 | ## **Contribution**
54 | 
55 | Contributions to RagTool and the broader praisonai tools ecosystem are welcome. To contribute, please follow the standard GitHub workflow for forking the repository, making changes, and submitting a pull request.
56 | 
57 | ## **License**
58 | 
59 | RagTool is open-source and available under the MIT license.
60 | 
61 | Thank you for considering RagTool for your knowledge base needs. Your contributions and feedback are invaluable to making RagTool even better.
62 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/rag/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MervinPraison/PraisonAI-Tools/2049b39d645f3e1ee9160e5a4c74f847996a9bfb/praisonai_tools/tools/rag/__init__.py


--------------------------------------------------------------------------------
/praisonai_tools/tools/rag/rag_tool.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any
 3 | 
 4 | from pydantic import BaseModel, Field, model_validator
 5 | 
 6 | from praisonai_tools.tools.base_tool import BaseTool
 7 | 
 8 | 
 9 | class Adapter(BaseModel, ABC):
10 |     class Config:
11 |         arbitrary_types_allowed = True
12 | 
13 |     @abstractmethod
14 |     def query(self, question: str) -> str:
15 |         """Query the knowledge base with a question and return the answer."""
16 | 
17 |     @abstractmethod
18 |     def add(
19 |         self,
20 |         *args: Any,
21 |         **kwargs: Any,
22 |     ) -> None:
23 |         """Add content to the knowledge base."""
24 | 
25 | 
26 | class RagTool(BaseTool):
27 |     class _AdapterPlaceholder(Adapter):
28 |         def query(self, question: str) -> str:
29 |             raise NotImplementedError
30 | 
31 |         def add(self, *args: Any, **kwargs: Any) -> None:
32 |             raise NotImplementedError
33 | 
34 |     name: str = "Knowledge base"
35 |     description: str = "A knowledge base that can be used to answer questions."
36 |     summarize: bool = False
37 |     adapter: Adapter = Field(default_factory=_AdapterPlaceholder)
38 |     config: dict[str, Any] | None = None
39 | 
40 |     @model_validator(mode="after")
41 |     def _set_default_adapter(self):
42 |         if isinstance(self.adapter, RagTool._AdapterPlaceholder):
43 |             from embedchain import App
44 | 
45 |             from praisonai_tools.adapters.embedchain_adapter import EmbedchainAdapter
46 | 
47 |             app = App.from_config(config=self.config) if self.config else App()
48 |             self.adapter = EmbedchainAdapter(
49 |                 embedchain_app=app, summarize=self.summarize
50 |             )
51 | 
52 |         return self
53 | 
54 |     def add(
55 |         self,
56 |         *args: Any,
57 |         **kwargs: Any,
58 |     ) -> None:
59 |         self.adapter.add(*args, **kwargs)
60 | 
61 |     def _run(
62 |         self,
63 |         query: str,
64 |         **kwargs: Any,
65 |     ) -> Any:
66 |         self._before_run(query, **kwargs)
67 | 
68 |         return f"Relevant Content:\n{self.adapter.query(query)}"
69 | 
70 |     def _before_run(self, query, **kwargs):
71 |         pass
72 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/scrape_element_from_website/scrape_element_from_website.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | from bs4 import BeautifulSoup
 4 | from typing import Optional, Type, Any
 5 | from pydantic.v1 import BaseModel, Field
 6 | from ..base_tool import BaseTool
 7 | 
 8 | class FixedScrapeElementFromWebsiteToolSchema(BaseModel):
 9 | 	"""Input for ScrapeElementFromWebsiteTool."""
10 | 	pass
11 | 
12 | class ScrapeElementFromWebsiteToolSchema(FixedScrapeElementFromWebsiteToolSchema):
13 | 	"""Input for ScrapeElementFromWebsiteTool."""
14 | 	website_url: str = Field(..., description="Mandatory website url to read the file")
15 | 	css_element: str = Field(..., description="Mandatory css reference for element to scrape from the website")
16 | 
17 | class ScrapeElementFromWebsiteTool(BaseTool):
18 | 	name: str = "Read a website content"
19 | 	description: str = "A tool that can be used to read a website content."
20 | 	args_schema: Type[BaseModel] = ScrapeElementFromWebsiteToolSchema
21 | 	website_url: Optional[str] = None
22 | 	cookies: Optional[dict] = None
23 | 	css_element: Optional[str] = None
24 | 	headers: Optional[dict] = {
25 | 		'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
26 | 		'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
27 | 		'Accept-Language': 'en-US,en;q=0.9',
28 | 		'Referer': 'https://www.google.com/',
29 | 		'Connection': 'keep-alive',
30 | 		'Upgrade-Insecure-Requests': '1',
31 | 		'Accept-Encoding': 'gzip, deflate, br'
32 | 	}
33 | 
34 | 	def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, css_element: Optional[str] = None, **kwargs):
35 | 		super().__init__(**kwargs)
36 | 		if website_url is not None:
37 | 			self.website_url = website_url
38 | 			self.css_element = css_element
39 | 			self.description = f"A tool that can be used to read {website_url}'s content."
40 | 			self.args_schema = FixedScrapeElementFromWebsiteToolSchema
41 | 			self._generate_description()
42 | 			if cookies is not None:
43 | 				self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
44 | 
45 | 	def _run(
46 | 		self,
47 | 		**kwargs: Any,
48 | 	) -> Any:
49 | 		website_url = kwargs.get('website_url', self.website_url)
50 | 		css_element = kwargs.get('css_element', self.css_element)
51 | 		page = requests.get(website_url, headers=self.headers, cookies=self.cookies if self.cookies else {})
52 | 		parsed = BeautifulSoup(page.content, "html.parser")
53 | 		elements = parsed.select(css_element)
54 | 		return "\n".join([element.get_text() for element in elements])
55 | 
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/scrape_website_tool/README.md:
--------------------------------------------------------------------------------
 1 | # ScrapeWebsiteTool
 2 | 
 3 | ## Description
 4 | A tool designed to extract and read the content of a specified website. It is capable of handling various types of web pages by making HTTP requests and parsing the received HTML content. This tool can be particularly useful for web scraping tasks, data collection, or extracting specific information from websites.
 5 | 
 6 | ## Installation
 7 | Install the praisonai_tools package
 8 | ```shell
 9 | pip install 'praisonai[tools]'
10 | ```
11 | 
12 | ## Example
13 | ```python
14 | from praisonai_tools import ScrapeWebsiteTool
15 | 
16 | # To enable scrapping any website it finds during it's execution
17 | tool = ScrapeWebsiteTool()
18 | 
19 | # Initialize the tool with the website URL, so the agent can only scrap the content of the specified website
20 | tool = ScrapeWebsiteTool(website_url='https://www.example.com')
21 | ```
22 | 
23 | ## Arguments
24 | - `website_url` : Mandatory website URL to read the file. This is the primary input for the tool, specifying which website's content should be scraped and read.


--------------------------------------------------------------------------------
/praisonai_tools/tools/scrape_website_tool/scrape_website_tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import requests
 3 | from bs4 import BeautifulSoup
 4 | from typing import Optional, Type, Any
 5 | from pydantic.v1 import BaseModel, Field
 6 | from ..base_tool import BaseTool
 7 | 
 8 | class FixedScrapeWebsiteToolSchema(BaseModel):
 9 | 	"""Input for ScrapeWebsiteTool."""
10 | 	pass
11 | 
12 | class ScrapeWebsiteToolSchema(FixedScrapeWebsiteToolSchema):
13 | 	"""Input for ScrapeWebsiteTool."""
14 | 	website_url: str = Field(..., description="Mandatory website url to read the file")
15 | 
16 | class ScrapeWebsiteTool(BaseTool):
17 | 	name: str = "Read website content"
18 | 	description: str = "A tool that can be used to read a website content."
19 | 	args_schema: Type[BaseModel] = ScrapeWebsiteToolSchema
20 | 	website_url: Optional[str] = None
21 | 	cookies: Optional[dict] = None
22 | 	headers: Optional[dict] = {
23 | 		'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36',
24 | 		'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
25 | 		'Accept-Language': 'en-US,en;q=0.9',
26 | 		'Referer': 'https://www.google.com/',
27 | 		'Connection': 'keep-alive',
28 | 		'Upgrade-Insecure-Requests': '1',
29 | 		'Accept-Encoding': 'gzip, deflate, br'
30 | 	}
31 | 
32 | 	def __init__(self, website_url: Optional[str] = None, cookies: Optional[dict] = None, **kwargs):
33 | 		super().__init__(**kwargs)
34 | 		if website_url is not None:
35 | 			self.website_url = website_url
36 | 			self.description = f"A tool that can be used to read {website_url}'s content."
37 | 			self.args_schema = FixedScrapeWebsiteToolSchema
38 | 			self._generate_description()
39 | 			if cookies is not None:
40 | 				self.cookies = {cookies["name"]: os.getenv(cookies["value"])}
41 | 
42 | 	def _run(
43 | 		self,
44 | 		**kwargs: Any,
45 | 	) -> Any:
46 | 		website_url = kwargs.get('website_url', self.website_url)
47 | 		page = requests.get(
48 | 			website_url,
49 | 			timeout=15,
50 | 			headers=self.headers,
51 | 			cookies=self.cookies if self.cookies else {}
52 | 		)
53 | 		parsed = BeautifulSoup(page.content, "html.parser")
54 | 		text = parsed.get_text()
55 | 		text = '\n'.join([i for i in text.split('\n') if i.strip() != ''])
56 | 		text = ' '.join([i for i in text.split(' ') if i.strip() != ''])
57 | 		return text
58 | 
59 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/selenium_scraping_tool/README.md:
--------------------------------------------------------------------------------
 1 | # SeleniumScrapingTool
 2 | 
 3 | ## Description
 4 | This tool is designed for efficient web scraping, enabling users to extract content from web pages. It supports targeted scraping by allowing the specification of a CSS selector for desired elements. The flexibility of the tool enables it to be used on any website URL provided by the user, making it a versatile tool for various web scraping needs.
 5 | 
 6 | ## Installation
 7 | Install the praisonai_tools package
 8 | ```
 9 | pip install 'praisonai[tools]'
10 | ```
11 | 
12 | ## Example
13 | ```python
14 | from praisonai_tools import SeleniumScrapingTool
15 | 
16 | # Example 1: Scrape any website it finds during its execution
17 | tool = SeleniumScrapingTool()
18 | 
19 | # Example 2: Scrape the entire webpage
20 | tool = SeleniumScrapingTool(website_url='https://example.com')
21 | 
22 | # Example 3: Scrape a specific CSS element from the webpage
23 | tool = SeleniumScrapingTool(website_url='https://example.com', css_element='.main-content')
24 | 
25 | # Example 4: Scrape using optional parameters for customized scraping
26 | tool = SeleniumScrapingTool(website_url='https://example.com', css_element='.main-content', cookie={'name': 'user', 'value': 'John Doe'})
27 | ```
28 | 
29 | ## Arguments
30 | - `website_url`: Mandatory. The URL of the website to scrape.
31 | - `css_element`: Mandatory. The CSS selector for a specific element to scrape from the website.
32 | - `cookie`: Optional. A dictionary containing cookie information. This parameter allows the tool to simulate a session with cookie information, providing access to content that may be restricted to logged-in users.
33 | - `wait_time`: Optional. The number of seconds the tool waits after loading the website and after setting a cookie, before scraping the content. This allows for dynamic content to load properly.
34 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Type, Any
 2 | import time
 3 | from pydantic.v1 import BaseModel, Field
 4 | 
 5 | from bs4 import BeautifulSoup
 6 | from selenium import webdriver
 7 | from selenium.webdriver.common.by import By
 8 | from selenium.webdriver.firefox.options import Options
 9 | 
10 | from ..base_tool import BaseTool
11 | 
12 | class FixedSeleniumScrapingToolSchema(BaseModel):
13 | 	"""Input for SeleniumScrapingTool."""
14 | 	pass
15 | 
16 | class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema):
17 | 	"""Input for SeleniumScrapingTool."""
18 | 	website_url: str = Field(..., description="Mandatory website url to read the file")
19 | 	css_element: str = Field(..., description="Mandatory css reference for element to scrape from the website")
20 | 
21 | class SeleniumScrapingTool(BaseTool):
22 | 	name: str = "Read a website content"
23 | 	description: str = "A tool that can be used to read a website content."
24 | 	args_schema: Type[BaseModel] = SeleniumScrapingToolSchema
25 | 	website_url: Optional[str] = None
26 | 	driver: Optional[Any] = webdriver.Chrome
27 | 	cookie: Optional[dict] = None
28 | 	wait_time: Optional[int] = 3
29 | 	css_element: Optional[str] = None
30 | 
31 | 	def __init__(self, website_url: Optional[str] = None, cookie: Optional[dict] = None, css_element: Optional[str] = None, **kwargs):
32 | 		super().__init__(**kwargs)
33 | 		if cookie is not None:
34 | 				self.cookie = cookie
35 | 
36 | 		if css_element is not None:
37 | 			self.css_element = css_element
38 | 
39 | 		if website_url is not None:
40 | 			self.website_url = website_url
41 | 			self.description = f"A tool that can be used to read {website_url}'s content."
42 | 			self.args_schema = FixedSeleniumScrapingToolSchema
43 | 
44 | 		self._generate_description()
45 | 	def _run(
46 | 		self,
47 | 		**kwargs: Any,
48 | 	) -> Any:
49 | 		website_url = kwargs.get('website_url', self.website_url)
50 | 		css_element = kwargs.get('css_element', self.css_element)
51 | 		driver = self._create_driver(website_url, self.cookie, self.wait_time)
52 | 
53 | 		content = []
54 | 		if css_element is None or css_element.strip() == "":
55 | 			body_text = driver.find_element(By.TAG_NAME, "body").text
56 | 			content.append(body_text)
57 | 		else:
58 | 			for element in driver.find_elements(By.CSS_SELECTOR, css_element):
59 | 				content.append(element.text)
60 | 		driver.close()
61 | 		return "\n".join(content)
62 | 
63 | 	def _create_driver(self, url, cookie, wait_time):
64 | 			options = Options()
65 | 			options.add_argument("--headless")
66 | 			driver = self.driver(options=options)
67 | 			driver.get(url)
68 | 			time.sleep(wait_time)
69 | 			if cookie:
70 | 				driver.add_cookie(cookie)
71 | 				time.sleep(wait_time)
72 | 				driver.get(url)
73 | 				time.sleep(wait_time)
74 | 			return driver
75 | 
76 | 	def close(self):
77 | 		self.driver.close()


--------------------------------------------------------------------------------
/praisonai_tools/tools/serper_dev_tool/README.md:
--------------------------------------------------------------------------------
 1 | # SerperDevTool Documentation
 2 | 
 3 | ## Description
 4 | This tool is designed to perform a semantic search for a specified query from a text's content across the internet. It utilizes the `serper.dev` API to fetch and display the most relevant search results based on the query provided by the user.
 5 | 
 6 | ## Installation
 7 | To incorporate this tool into your project, follow the installation instructions below:
 8 | ```shell
 9 | pip install 'praisonai[tools]'
10 | ```
11 | 
12 | ## Example
13 | The following example demonstrates how to initialize the tool and execute a search with a given query:
14 | 
15 | ```python
16 | from praisonai_tools import SerperDevTool
17 | 
18 | # Initialize the tool for internet searching capabilities
19 | tool = SerperDevTool()
20 | ```
21 | 
22 | ## Steps to Get Started
23 | To effectively use the `SerperDevTool`, follow these steps:
24 | 
25 | 1. **Package Installation**: Confirm that the `praisonai[tools]` package is installed in your Python environment.
26 | 2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at `serper.dev`.
27 | 3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPER_API_KEY` to facilitate its use by the tool.
28 | 
29 | ## Conclusion
30 | By integrating the `SerperDevTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.
31 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/serper_dev_tool/serper_dev_tool.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import requests
 4 | 
 5 | from typing import Type, Any
 6 | from pydantic.v1 import BaseModel, Field
 7 | from praisonai_tools.tools.base_tool import BaseTool
 8 | 
 9 | class SerperDevToolSchema(BaseModel):
10 | 	"""Input for SerperDevTool."""
11 | 	search_query: str = Field(..., description="Mandatory search query you want to use to search the internet")
12 | 
13 | class SerperDevTool(BaseTool):
14 | 	name: str = "Search the internet"
15 | 	description: str = "A tool that can be used to search the internet with a search_query."
16 | 	args_schema: Type[BaseModel] = SerperDevToolSchema
17 | 	search_url: str = "https://google.serper.dev/search"
18 | 	n_results: int = 10
19 | 
20 | 	def _run(
21 | 		self,
22 | 		**kwargs: Any,
23 | 	) -> Any:
24 | 		search_query = kwargs.get('search_query')
25 | 		if search_query is None:
26 | 			search_query = kwargs.get('query')
27 | 
28 | 		payload = json.dumps({"q": search_query})
29 | 		headers = {
30 | 				'X-API-KEY': os.environ['SERPER_API_KEY'],
31 | 				'content-type': 'application/json'
32 | 		}
33 | 		response = requests.request("POST", self.search_url, headers=headers, data=payload)
34 | 		results = response.json()
35 | 		if 'organic' in results:
36 | 			results = results['organic']
37 | 			string = []
38 | 			for result in results:
39 | 				try:
40 | 					string.append('\n'.join([
41 | 							f"Title: {result['title']}",
42 | 							f"Link: {result['link']}",
43 | 							f"Snippet: {result['snippet']}",
44 | 							"---"
45 | 					]))
46 | 				except KeyError:
47 | 					next
48 | 
49 | 			content = '\n'.join(string)
50 | 			return f"\nSearch results: {content}\n"
51 | 		else:
52 | 			return results
53 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/txt_search_tool/README.md:
--------------------------------------------------------------------------------
 1 | # TXTSearchTool
 2 | 
 3 | ## Description
 4 | This tool is used to perform a RAG (Retrieval-Augmented Generation) search within the content of a text file. It allows for semantic searching of a query within a specified text file's content, making it an invaluable resource for quickly extracting information or finding specific sections of text based on the query provided.
 5 | 
 6 | ## Installation
 7 | To use the TXTSearchTool, you first need to install the praisonai_tools package. This can be done using pip, a package manager for Python. Open your terminal or command prompt and enter the following command:
 8 | 
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | This command will download and install the TXTSearchTool along with any necessary dependencies.
14 | 
15 | ## Example
16 | The following example demonstrates how to use the TXTSearchTool to search within a text file. This example shows both the initialization of the tool with a specific text file and the subsequent search within that file's content.
17 | 
18 | ```python
19 | from praisonai_tools import TXTSearchTool
20 | 
21 | # Initialize the tool to search within any text file's content the agent learns about during its execution
22 | tool = TXTSearchTool()
23 | 
24 | # OR
25 | 
26 | # Initialize the tool with a specific text file, so the agent can search within the given text file's content
27 | tool = TXTSearchTool(txt='path/to/text/file.txt')
28 | ```
29 | 
30 | ## Arguments
31 | - `txt` (str): **Optinal**. The path to the text file you want to search. This argument is only required if the tool was not initialized with a specific text file; otherwise, the search will be conducted within the initially provided text file.
32 | 
33 | ## Custom model and embeddings
34 | 
35 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
36 | 
37 | ```python
38 | tool = TXTSearchTool(
39 |     config=dict(
40 |         llm=dict(
41 |             provider="ollama", # or google, openai, anthropic, llama2, ...
42 |             config=dict(
43 |                 model="llama2",
44 |                 # temperature=0.5,
45 |                 # top_p=1,
46 |                 # stream=true,
47 |             ),
48 |         ),
49 |         embedder=dict(
50 |             provider="google",
51 |             config=dict(
52 |                 model="models/embedding-001",
53 |                 task_type="retrieval_document",
54 |                 # title="Embeddings",
55 |             ),
56 |         ),
57 |     )
58 | )
59 | ```
60 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/txt_search_tool/txt_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | 
 3 | from embedchain.models.data_type import DataType
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedTXTSearchToolSchema(BaseModel):
10 |     """Input for TXTSearchTool."""
11 | 
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory search query you want to use to search the txt's content",
15 |     )
16 | 
17 | 
18 | class TXTSearchToolSchema(FixedTXTSearchToolSchema):
19 |     """Input for TXTSearchTool."""
20 | 
21 |     txt: str = Field(..., description="Mandatory txt path you want to search")
22 | 
23 | 
24 | class TXTSearchTool(RagTool):
25 |     name: str = "Search a txt's content"
26 |     description: str = (
27 |         "A tool that can be used to semantic search a query from a txt's content."
28 |     )
29 |     args_schema: Type[BaseModel] = TXTSearchToolSchema
30 | 
31 |     def __init__(self, txt: Optional[str] = None, **kwargs):
32 |         super().__init__(**kwargs)
33 |         if txt is not None:
34 |             self.add(txt)
35 |             self.description = f"A tool that can be used to semantic search a query the {txt} txt's content."
36 |             self.args_schema = FixedTXTSearchToolSchema
37 |             self._generate_description()
38 | 
39 |     def add(
40 |         self,
41 |         *args: Any,
42 |         **kwargs: Any,
43 |     ) -> None:
44 |         kwargs["data_type"] = DataType.TEXT_FILE
45 |         super().add(*args, **kwargs)
46 | 
47 |     def _before_run(
48 |         self,
49 |         query: str,
50 |         **kwargs: Any,
51 |     ) -> Any:
52 |         if "txt" in kwargs:
53 |             self.add(kwargs["txt"])
54 | 
55 |     def _run(
56 |         self,
57 |         search_query: str,
58 |         **kwargs: Any,
59 |     ) -> Any:
60 |         return super()._run(query=search_query)
61 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/website_search/README.md:
--------------------------------------------------------------------------------
 1 | # WebsiteSearchTool
 2 | 
 3 | ## Description
 4 | This tool is specifically crafted for conducting semantic searches within the content of a particular website. Leveraging a Retrieval-Augmented Generation (RAG) model, it navigates through the information provided on a given URL. Users have the flexibility to either initiate a search across any website known or discovered during its usage or to concentrate the search on a predefined, specific website.
 5 | 
 6 | ## Installation
 7 | Install the praisonai_tools package by executing the following command in your terminal:
 8 | 
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | ## Example
14 | To utilize the WebsiteSearchTool for different use cases, follow these examples:
15 | 
16 | ```python
17 | from praisonai_tools import WebsiteSearchTool
18 | 
19 | # To enable the tool to search any website the agent comes across or learns about during its operation
20 | tool = WebsiteSearchTool()
21 | 
22 | # OR
23 | 
24 | # To restrict the tool to only search within the content of a specific website.
25 | tool = WebsiteSearchTool(website='https://example.com')
26 | ```
27 | 
28 | ## Arguments
29 | - `website` : An optional argument that specifies the valid website URL to perform the search on. This becomes necessary if the tool is initialized without a specific website. In the `WebsiteSearchToolSchema`, this argument is mandatory. However, in the `FixedWebsiteSearchToolSchema`, it becomes optional if a website is provided during the tool's initialization, as it will then only search within the predefined website's content.
30 | 
31 | ## Custom model and embeddings
32 | 
33 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
34 | 
35 | ```python
36 | tool = WebsiteSearchTool(
37 |     config=dict(
38 |         llm=dict(
39 |             provider="ollama", # or google, openai, anthropic, llama2, ...
40 |             config=dict(
41 |                 model="llama2",
42 |                 # temperature=0.5,
43 |                 # top_p=1,
44 |                 # stream=true,
45 |             ),
46 |         ),
47 |         embedder=dict(
48 |             provider="google",
49 |             config=dict(
50 |                 model="models/embedding-001",
51 |                 task_type="retrieval_document",
52 |                 # title="Embeddings",
53 |             ),
54 |         ),
55 |     )
56 | )
57 | ```
58 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/website_search/website_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | 
 3 | from embedchain.models.data_type import DataType
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedWebsiteSearchToolSchema(BaseModel):
10 |     """Input for WebsiteSearchTool."""
11 | 
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory search query you want to use to search a specific website",
15 |     )
16 | 
17 | 
18 | class WebsiteSearchToolSchema(FixedWebsiteSearchToolSchema):
19 |     """Input for WebsiteSearchTool."""
20 | 
21 |     website: str = Field(
22 |         ..., description="Mandatory valid website URL you want to search on"
23 |     )
24 | 
25 | 
26 | class WebsiteSearchTool(RagTool):
27 |     name: str = "Search in a specific website"
28 |     description: str = "A tool that can be used to semantic search a query from a specific URL content."
29 |     args_schema: Type[BaseModel] = WebsiteSearchToolSchema
30 | 
31 |     def __init__(self, website: Optional[str] = None, **kwargs):
32 |         super().__init__(**kwargs)
33 |         if website is not None:
34 |             self.add(website)
35 |             self.description = f"A tool that can be used to semantic search a query from {website} website content."
36 |             self.args_schema = FixedWebsiteSearchToolSchema
37 |             self._generate_description()
38 | 
39 |     def add(
40 |         self,
41 |         *args: Any,
42 |         **kwargs: Any,
43 |     ) -> None:
44 |         kwargs["data_type"] = DataType.WEB_PAGE
45 |         super().add(*args, **kwargs)
46 | 
47 |     def _before_run(
48 |         self,
49 |         query: str,
50 |         **kwargs: Any,
51 |     ) -> Any:
52 |         if "website" in kwargs:
53 |             self.add(kwargs["website"])
54 | 
55 |     def _run(
56 |         self,
57 |         search_query: str,
58 |         **kwargs: Any,
59 |     ) -> Any:
60 |         return super()._run(query=search_query)
61 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/xml_search_tool/README.md:
--------------------------------------------------------------------------------
 1 | # XMLSearchTool
 2 | 
 3 | ## Description
 4 | The XMLSearchTool is a cutting-edge RAG tool engineered for conducting semantic searches within XML files. Ideal for users needing to parse and extract information from XML content efficiently, this tool supports inputting a search query and an optional XML file path. By specifying an XML path, users can target their search more precisely to the content of that file, thereby obtaining more relevant search outcomes.
 5 | 
 6 | ## Installation
 7 | To start using the XMLSearchTool, you must first install the praisonai_tools package. This can be easily done with the following command:
 8 | 
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | ## Example
14 | Here are two examples demonstrating how to use the XMLSearchTool. The first example shows searching within a specific XML file, while the second example illustrates initiating a search without predefining an XML path, providing flexibility in search scope.
15 | 
16 | ```python
17 | from praisonai_tools.tools.xml_search_tool import XMLSearchTool
18 | 
19 | # Allow agents to search within any XML file's content as it learns about their paths during execution
20 | tool = XMLSearchTool()
21 | 
22 | # OR
23 | 
24 | # Initialize the tool with a specific XML file path for exclusive search within that document
25 | tool = XMLSearchTool(xml='path/to/your/xmlfile.xml')
26 | ```
27 | 
28 | ## Arguments
29 | - `xml`: This is the path to the XML file you wish to search. It is an optional parameter during the tool's initialization but must be provided either at initialization or as part of the `run` method's arguments to execute a search.
30 | 
31 | ## Custom model and embeddings
32 | 
33 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
34 | 
35 | ```python
36 | tool = XMLSearchTool(
37 |     config=dict(
38 |         llm=dict(
39 |             provider="ollama", # or google, openai, anthropic, llama2, ...
40 |             config=dict(
41 |                 model="llama2",
42 |                 # temperature=0.5,
43 |                 # top_p=1,
44 |                 # stream=true,
45 |             ),
46 |         ),
47 |         embedder=dict(
48 |             provider="google",
49 |             config=dict(
50 |                 model="models/embedding-001",
51 |                 task_type="retrieval_document",
52 |                 # title="Embeddings",
53 |             ),
54 |         ),
55 |     )
56 | )
57 | ```
58 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/xml_search_tool/xml_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | 
 3 | from embedchain.models.data_type import DataType
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedXMLSearchToolSchema(BaseModel):
10 |     """Input for XMLSearchTool."""
11 | 
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory search query you want to use to search the XML's content",
15 |     )
16 | 
17 | 
18 | class XMLSearchToolSchema(FixedXMLSearchToolSchema):
19 |     """Input for XMLSearchTool."""
20 | 
21 |     xml: str = Field(..., description="Mandatory xml path you want to search")
22 | 
23 | 
24 | class XMLSearchTool(RagTool):
25 |     name: str = "Search a XML's content"
26 |     description: str = (
27 |         "A tool that can be used to semantic search a query from a XML's content."
28 |     )
29 |     args_schema: Type[BaseModel] = XMLSearchToolSchema
30 | 
31 |     def __init__(self, xml: Optional[str] = None, **kwargs):
32 |         super().__init__(**kwargs)
33 |         if xml is not None:
34 |             self.add(xml)
35 |             self.description = f"A tool that can be used to semantic search a query the {xml} XML's content."
36 |             self.args_schema = FixedXMLSearchToolSchema
37 |             self._generate_description()
38 | 
39 |     def add(
40 |         self,
41 |         *args: Any,
42 |         **kwargs: Any,
43 |     ) -> None:
44 |         kwargs["data_type"] = DataType.XML
45 |         super().add(*args, **kwargs)
46 | 
47 |     def _before_run(
48 |         self,
49 |         query: str,
50 |         **kwargs: Any,
51 |     ) -> Any:
52 |         if "xml" in kwargs:
53 |             self.add(kwargs["xml"])
54 | 
55 |     def _run(
56 |         self,
57 |         search_query: str,
58 |         **kwargs: Any,
59 |     ) -> Any:
60 |         return super()._run(query=search_query)
61 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/youtube_channel_search_tool/README.md:
--------------------------------------------------------------------------------
 1 | # YoutubeChannelSearchTool
 2 | 
 3 | ## Description
 4 | This tool is designed to perform semantic searches within a specific Youtube channel's content. Leveraging the RAG (Retrieval-Augmented Generation) methodology, it provides relevant search results, making it invaluable for extracting information or finding specific content without the need to manually sift through videos. It streamlines the search process within Youtube channels, catering to researchers, content creators, and viewers seeking specific information or topics.
 5 | 
 6 | ## Installation
 7 | To utilize the YoutubeChannelSearchTool, the `praisonai_tools` package must be installed. Execute the following command in your shell to install:
 8 | 
 9 | ```shell
10 | pip install 'praisonai[tools]'
11 | ```
12 | 
13 | ## Example
14 | To begin using the YoutubeChannelSearchTool, follow the example below. This demonstrates initializing the tool with a specific Youtube channel handle and conducting a search within that channel's content.
15 | 
16 | ```python
17 | from praisonai_tools import YoutubeChannelSearchTool
18 | 
19 | # Initialize the tool to search within any Youtube channel's content the agent learns about during its execution
20 | tool = YoutubeChannelSearchTool()
21 | 
22 | # OR
23 | 
24 | # Initialize the tool with a specific Youtube channel handle to target your search
25 | tool = YoutubeChannelSearchTool(youtube_channel_handle='@exampleChannel')
26 | ```
27 | 
28 | ## Arguments
29 | - `youtube_channel_handle` : A mandatory string representing the Youtube channel handle. This parameter is crucial for initializing the tool to specify the channel you want to search within. The tool is designed to only search within the content of the provided channel handle.
30 | 
31 | ## Custom model and embeddings
32 | 
33 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
34 | 
35 | ```python
36 | tool = YoutubeChannelSearchTool(
37 |     config=dict(
38 |         llm=dict(
39 |             provider="ollama", # or google, openai, anthropic, llama2, ...
40 |             config=dict(
41 |                 model="llama2",
42 |                 # temperature=0.5,
43 |                 # top_p=1,
44 |                 # stream=true,
45 |             ),
46 |         ),
47 |         embedder=dict(
48 |             provider="google",
49 |             config=dict(
50 |                 model="models/embedding-001",
51 |                 task_type="retrieval_document",
52 |                 # title="Embeddings",
53 |             ),
54 |         ),
55 |     )
56 | )
57 | ```
58 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | 
 3 | from embedchain.models.data_type import DataType
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedYoutubeChannelSearchToolSchema(BaseModel):
10 |     """Input for YoutubeChannelSearchTool."""
11 | 
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory search query you want to use to search the Youtube Channels content",
15 |     )
16 | 
17 | 
18 | class YoutubeChannelSearchToolSchema(FixedYoutubeChannelSearchToolSchema):
19 |     """Input for YoutubeChannelSearchTool."""
20 | 
21 |     youtube_channel_handle: str = Field(
22 |         ..., description="Mandatory youtube_channel_handle path you want to search"
23 |     )
24 | 
25 | 
26 | class YoutubeChannelSearchTool(RagTool):
27 |     name: str = "Search a Youtube Channels content"
28 |     description: str = "A tool that can be used to semantic search a query from a Youtube Channels content."
29 |     args_schema: Type[BaseModel] = YoutubeChannelSearchToolSchema
30 | 
31 |     def __init__(self, youtube_channel_handle: Optional[str] = None, **kwargs):
32 |         super().__init__(**kwargs)
33 |         if youtube_channel_handle is not None:
34 |             self.add(youtube_channel_handle)
35 |             self.description = f"A tool that can be used to semantic search a query the {youtube_channel_handle} Youtube Channels content."
36 |             self.args_schema = FixedYoutubeChannelSearchToolSchema
37 |             self._generate_description()
38 | 
39 |     def add(
40 |         self,
41 |         youtube_channel_handle: str,
42 |         **kwargs: Any,
43 |     ) -> None:
44 |         if not youtube_channel_handle.startswith("@"):
45 |             youtube_channel_handle = f"@{youtube_channel_handle}"
46 | 
47 |         kwargs["data_type"] = DataType.YOUTUBE_CHANNEL
48 |         super().add(youtube_channel_handle, **kwargs)
49 | 
50 |     def _before_run(
51 |         self,
52 |         query: str,
53 |         **kwargs: Any,
54 |     ) -> Any:
55 |         if "youtube_channel_handle" in kwargs:
56 |             self.add(kwargs["youtube_channel_handle"])
57 | 
58 |     def _run(
59 |         self,
60 |         search_query: str,
61 |         **kwargs: Any,
62 |     ) -> Any:
63 |         return super()._run(query=search_query)
64 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/youtube_video_search_tool/README.md:
--------------------------------------------------------------------------------
 1 | # YoutubeVideoSearchTool
 2 | 
 3 | ## Description
 4 | 
 5 | This tool is part of the `praisonai_tools` package and is designed to perform semantic searches within Youtube video content, utilizing Retrieval-Augmented Generation (RAG) techniques. It is one of several "Search" tools in the package that leverage RAG for different sources. The YoutubeVideoSearchTool allows for flexibility in searches; users can search across any Youtube video content without specifying a video URL, or they can target their search to a specific Youtube video by providing its URL.
 6 | 
 7 | ## Installation
 8 | 
 9 | To utilize the YoutubeVideoSearchTool, you must first install the `praisonai_tools` package. This package contains the YoutubeVideoSearchTool among other utilities designed to enhance your data analysis and processing tasks. Install the package by executing the following command in your terminal:
10 | 
11 | ```
12 | pip install 'praisonai[tools]'
13 | ```
14 | 
15 | ## Example
16 | 
17 | To integrate the YoutubeVideoSearchTool into your Python projects, follow the example below. This demonstrates how to use the tool both for general Youtube content searches and for targeted searches within a specific video's content.
18 | 
19 | ```python
20 | from praisonai_tools import YoutubeVideoSearchTool
21 | 
22 | # General search across Youtube content without specifying a video URL, so the agent can search within any Youtube video content it learns about irs url during its operation
23 | tool = YoutubeVideoSearchTool()
24 | 
25 | # Targeted search within a specific Youtube video's content
26 | tool = YoutubeVideoSearchTool(youtube_video_url='https://youtube.com/watch?v=example')
27 | ```
28 | ## Arguments
29 | 
30 | The YoutubeVideoSearchTool accepts the following initialization arguments:
31 | 
32 | - `youtube_video_url`: An optional argument at initialization but required if targeting a specific Youtube video. It specifies the Youtube video URL path you want to search within.
33 | 
34 | ## Custom model and embeddings
35 | 
36 | By default, the tool uses OpenAI for both embeddings and summarization. To customize the model, you can use a config dictionary as follows:
37 | 
38 | ```python
39 | tool = YoutubeVideoSearchTool(
40 |     config=dict(
41 |         llm=dict(
42 |             provider="ollama", # or google, openai, anthropic, llama2, ...
43 |             config=dict(
44 |                 model="llama2",
45 |                 # temperature=0.5,
46 |                 # top_p=1,
47 |                 # stream=true,
48 |             ),
49 |         ),
50 |         embedder=dict(
51 |             provider="google",
52 |             config=dict(
53 |                 model="models/embedding-001",
54 |                 task_type="retrieval_document",
55 |                 # title="Embeddings",
56 |             ),
57 |         ),
58 |     )
59 | )
60 | ```
61 | 


--------------------------------------------------------------------------------
/praisonai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional, Type
 2 | 
 3 | from embedchain.models.data_type import DataType
 4 | from pydantic.v1 import BaseModel, Field
 5 | 
 6 | from ..rag.rag_tool import RagTool
 7 | 
 8 | 
 9 | class FixedYoutubeVideoSearchToolSchema(BaseModel):
10 |     """Input for YoutubeVideoSearchTool."""
11 | 
12 |     search_query: str = Field(
13 |         ...,
14 |         description="Mandatory search query you want to use to search the Youtube Video content",
15 |     )
16 | 
17 | 
18 | class YoutubeVideoSearchToolSchema(FixedYoutubeVideoSearchToolSchema):
19 |     """Input for YoutubeVideoSearchTool."""
20 | 
21 |     youtube_video_url: str = Field(
22 |         ..., description="Mandatory youtube_video_url path you want to search"
23 |     )
24 | 
25 | 
26 | class YoutubeVideoSearchTool(RagTool):
27 |     name: str = "Search a Youtube Video content"
28 |     description: str = "A tool that can be used to semantic search a query from a Youtube Video content."
29 |     args_schema: Type[BaseModel] = YoutubeVideoSearchToolSchema
30 | 
31 |     def __init__(self, youtube_video_url: Optional[str] = None, **kwargs):
32 |         super().__init__(**kwargs)
33 |         if youtube_video_url is not None:
34 |             self.add(youtube_video_url)
35 |             self.description = f"A tool that can be used to semantic search a query the {youtube_video_url} Youtube Video content."
36 |             self.args_schema = FixedYoutubeVideoSearchToolSchema
37 |             self._generate_description()
38 | 
39 |     def add(
40 |         self,
41 |         *args: Any,
42 |         **kwargs: Any,
43 |     ) -> None:
44 |         kwargs["data_type"] = DataType.YOUTUBE_VIDEO
45 |         super().add(*args, **kwargs)
46 | 
47 |     def _before_run(
48 |         self,
49 |         query: str,
50 |         **kwargs: Any,
51 |     ) -> Any:
52 |         if "youtube_video_url" in kwargs:
53 |             self.add(kwargs["youtube_video_url"])
54 | 
55 |     def _run(
56 |         self,
57 |         search_query: str,
58 |         **kwargs: Any,
59 |     ) -> Any:
60 |         return super()._run(query=search_query)
61 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "PraisonAI-Tools"
 3 | version = "0.0.16"
 4 | description = "Set of tools for the PraisonAI framework"
 5 | authors = [
 6 |     {name = "Mervin Praison"}
 7 | ]
 8 | readme = "README.md"
 9 | requires-python = ">=3.10,<=3.13.2"
10 | dependencies = [
11 |     "pydantic>=2.6.1,<=2.10.1",
12 |     "langchain>=0.3.25,<0.4.0",
13 |     "pytest>=8.3.5,<9.0.0",
14 |     "lancedb>=0.22.0,<0.25.0",
15 |     "openai>=1.75.0,<2.0.0",
16 |     "embedchain>=0.1.128",
17 |     "chromadb>=0.5.10,<0.6.0",
18 |     "pyright>=1.1.400,<2.0.0",
19 |     "pytube>=15.0.0,<16.0.0",
20 |     "requests>=2.32.3,<3.0.0",
21 |     "beautifulsoup4>=4.13.4,<5.0.0",
22 |     "selenium>=4.32.0,<5.0.0",
23 |     "docx2txt>=0.8,<0.9",
24 |     "crewai-tools>=0.44.0,<0.50.0",
25 |     "docker>=7.1.0,<8.0.0",
26 |     "crewai>=0.118.0",
27 |     "click>=8.2.0,<9.0.0",
28 | ]
29 | 
30 | [project.urls]
31 | Homepage = "https://docs.praison.ai"
32 | Repository = "https://github.com/mervinpraison/PraisonAI-tools"
33 | 
34 | # Poetry configuration
35 | [tool.poetry]
36 | name = "PraisonAI-Tools"
37 | version = "0.0.16"
38 | description = "Set of tools for the PraisonAI framework"
39 | authors = ["Mervin Praison"]
40 | readme = "README.md"
41 | 
42 | [tool.poetry.dependencies]
43 | python = ">=3.10,<=3.13.2"
44 | pydantic = "^2.6.1"
45 | langchain = ">=0.3.25,<0.4.0"
46 | pytest = "^8.3.5"
47 | lancedb = "^0.22.0"
48 | openai = "^1.75.0"
49 | embedchain = {extras = ["github", "youtube"], version = ">=0.1.123"}
50 | chromadb = ">=0.5.10,<0.6.0"
51 | pyright = "^1.1.400"
52 | pytube = "^15.0.0"
53 | requests = "^2.32.3"
54 | beautifulsoup4 = "^4.13.4"
55 | selenium = "^4.32.0"
56 | docx2txt = "^0.8"
57 | crewai-tools = "^0.44.0"
58 | docker = "^7.1.0"
59 | crewai = "^0.118.0"
60 | click = "^8.2.0"
61 | 
62 | [tool.poetry.urls]
63 | Homepage = "https://docs.praison.ai"
64 | Repository = "https://github.com/mervinpraison/PraisonAI-tools"
65 | 
66 | [build-system]
67 | requires = ["hatchling>=1.0.0", "poetry-core>=1.0.0"]
68 | build-backend = "hatchling.build"


--------------------------------------------------------------------------------
/tests/base_tool_test.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable
 2 | from praisonai_tools import BaseTool, tool
 3 | 
 4 | def test_creating_a_tool_using_annotation():
 5 | 	@tool("Name of my tool")
 6 | 	def my_tool(question: str) -> str:
 7 | 		"""Clear description for what this tool is useful for, you agent will need this information to use it."""
 8 | 		return question
 9 | 
10 | 	# Assert all the right attributes were defined
11 | 	assert my_tool.name == "Name of my tool"
12 | 	assert my_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it."
13 | 	assert my_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}}
14 | 	assert my_tool.func("What is the meaning of life?") == "What is the meaning of life?"
15 | 
16 | 	# Assert the langchain tool conversion worked as expected
17 | 	converted_tool = my_tool.to_langchain()
18 | 	assert converted_tool.name == "Name of my tool"
19 | 	assert converted_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it."
20 | 	assert converted_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}}
21 | 	assert converted_tool.func("What is the meaning of life?") == "What is the meaning of life?"
22 | 
23 | def test_creating_a_tool_using_baseclass():
24 | 	class MyCustomTool(BaseTool):
25 | 		name: str = "Name of my tool"
26 | 		description: str = "Clear description for what this tool is useful for, you agent will need this information to use it."
27 | 
28 | 		def _run(self, question: str) -> str:
29 | 			return question
30 | 
31 | 	my_tool = MyCustomTool()
32 | 	# Assert all the right attributes were defined
33 | 	assert my_tool.name == "Name of my tool"
34 | 	assert my_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it."
35 | 	assert my_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}}
36 | 	assert my_tool.run("What is the meaning of life?") == "What is the meaning of life?"
37 | 
38 | 	# Assert the langchain tool conversion worked as expected
39 | 	converted_tool = my_tool.to_langchain()
40 | 	assert converted_tool.name == "Name of my tool"
41 | 	assert converted_tool.description == "Name of my tool(question: 'string') - Clear description for what this tool is useful for, you agent will need this information to use it."
42 | 	assert converted_tool.args_schema.schema()["properties"] == {'question': {'title': 'Question', 'type': 'string'}}
43 | 	assert converted_tool.run("What is the meaning of life?") == "What is the meaning of life?"
44 | 
45 | def test_setting_cache_function():
46 | 	class MyCustomTool(BaseTool):
47 | 		name: str = "Name of my tool"
48 | 		description: str = "Clear description for what this tool is useful for, you agent will need this information to use it."
49 | 		cache_function: Callable = lambda: False
50 | 
51 | 		def _run(self, question: str) -> str:
52 | 			return question
53 | 
54 | 	my_tool = MyCustomTool()
55 | 	# Assert all the right attributes were defined
56 | 	assert my_tool.cache_function() == False
57 | 
58 | def test_default_cache_function_is_true():
59 | 	class MyCustomTool(BaseTool):
60 | 		name: str = "Name of my tool"
61 | 		description: str = "Clear description for what this tool is useful for, you agent will need this information to use it."
62 | 
63 | 		def _run(self, question: str) -> str:
64 | 			return question
65 | 
66 | 	my_tool = MyCustomTool()
67 | 	# Assert all the right attributes were defined
68 | 	assert my_tool.cache_function() == True


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable
 2 | 
 3 | import pytest
 4 | 
 5 | 
 6 | class Helpers:
 7 |     @staticmethod
 8 |     def get_embedding_function() -> Callable:
 9 |         def _func(input):
10 |             assert input == ["What are the requirements for the task?"]
11 |             with open("tests/data/embedding.txt", "r") as file:
12 |                 content = file.read()
13 |                 numbers = content.split(",")
14 |                 return [[float(number) for number in numbers]]
15 | 
16 |         return _func
17 | 
18 | 
19 | @pytest.fixture
20 | def helpers():
21 |     return Helpers
22 | 


--------------------------------------------------------------------------------
/tests/tools/rag/rag_tool_test.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from tempfile import NamedTemporaryFile
 3 | from typing import cast
 4 | from unittest import mock
 5 | 
 6 | from pytest import fixture
 7 | 
 8 | from praisonai_tools.adapters.embedchain_adapter import EmbedchainAdapter
 9 | from praisonai_tools.tools.rag.rag_tool import RagTool
10 | 
11 | 
12 | @fixture(autouse=True)
13 | def mock_embedchain_db_uri():
14 |     with NamedTemporaryFile() as tmp:
15 |         uri = f"sqlite:///{tmp.name}"
16 |         with mock.patch.dict(os.environ, {"EMBEDCHAIN_DB_URI": uri}):
17 |             yield
18 | 
19 | 
20 | def test_custom_llm_and_embedder():
21 |     class MyTool(RagTool):
22 |         pass
23 | 
24 |     tool = MyTool(
25 |         config=dict(
26 |             llm=dict(
27 |                 provider="openai",
28 |                 config=dict(model="gpt-3.5-custom"),
29 |             ),
30 |             embedder=dict(
31 |                 provider="openai",
32 |                 config=dict(model="text-embedding-3-custom"),
33 |             ),
34 |         )
35 |     )
36 |     assert tool.adapter is not None
37 |     assert isinstance(tool.adapter, EmbedchainAdapter)
38 | 
39 |     adapter = cast(EmbedchainAdapter, tool.adapter)
40 |     assert adapter.embedchain_app.llm.config.model == "gpt-3.5-custom"
41 |     assert (
42 |         adapter.embedchain_app.embedding_model.config.model == "text-embedding-3-custom"
43 |     )
44 | 


--------------------------------------------------------------------------------