├── .env.example ├── .gitattributes ├── .gitignore ├── .gitmodules ├── .vscode └── extensions.json ├── LICENSE ├── README.md ├── app.py ├── planning └── autogen_planner.py ├── plugins ├── bing_connector.py ├── sk_bing_plugin.py └── sk_web_pages_plugin.py └── requirements.txt /.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=keygoeshere 2 | AZURE_API_KEY=keygoeshere 3 | BING_API_KEY=keygoeshere 4 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | *.7z filter=lfs diff=lfs merge=lfs -text 2 | *.arrow filter=lfs diff=lfs merge=lfs -text 3 | *.bin filter=lfs diff=lfs merge=lfs -text 4 | *.bz2 filter=lfs diff=lfs merge=lfs -text 5 | *.ckpt filter=lfs diff=lfs merge=lfs -text 6 | *.ftz filter=lfs diff=lfs merge=lfs -text 7 | *.gz filter=lfs diff=lfs merge=lfs -text 8 | *.h5 filter=lfs diff=lfs merge=lfs -text 9 | *.joblib filter=lfs diff=lfs merge=lfs -text 10 | *.lfs.* filter=lfs diff=lfs merge=lfs -text 11 | *.mlmodel filter=lfs diff=lfs merge=lfs -text 12 | *.model filter=lfs diff=lfs merge=lfs -text 13 | *.msgpack filter=lfs diff=lfs merge=lfs -text 14 | *.npy filter=lfs diff=lfs merge=lfs -text 15 | *.npz filter=lfs diff=lfs merge=lfs -text 16 | *.onnx filter=lfs diff=lfs merge=lfs -text 17 | *.ot filter=lfs diff=lfs merge=lfs -text 18 | *.parquet filter=lfs diff=lfs merge=lfs -text 19 | *.pb filter=lfs diff=lfs merge=lfs -text 20 | *.pickle filter=lfs diff=lfs merge=lfs -text 21 | *.pkl filter=lfs diff=lfs merge=lfs -text 22 | *.pt filter=lfs diff=lfs merge=lfs -text 23 | *.pth filter=lfs diff=lfs merge=lfs -text 24 | *.rar filter=lfs diff=lfs merge=lfs -text 25 | *.safetensors filter=lfs diff=lfs merge=lfs -text 26 | saved_model/**/* filter=lfs diff=lfs merge=lfs -text 27 | *.tar.* filter=lfs diff=lfs merge=lfs -text 28 | *.tar filter=lfs diff=lfs merge=lfs -text 29 | *.tflite filter=lfs diff=lfs merge=lfs -text 30 | *.tgz filter=lfs diff=lfs merge=lfs -text 31 | *.wasm filter=lfs diff=lfs merge=lfs -text 32 | *.xz filter=lfs diff=lfs merge=lfs -text 33 | *.zip filter=lfs diff=lfs merge=lfs -text 34 | *.zst filter=lfs diff=lfs merge=lfs -text 35 | *tfevents* filter=lfs diff=lfs merge=lfs -text 36 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 105 | __pypackages__/ 106 | 107 | # Celery stuff 108 | celerybeat-schedule 109 | celerybeat.pid 110 | 111 | # SageMath parsed files 112 | *.sage.py 113 | 114 | # Environments 115 | .env 116 | .venv 117 | env/ 118 | venv/ 119 | ENV/ 120 | env.bak/ 121 | venv.bak/ 122 | 123 | # Spyder project settings 124 | .spyderproject 125 | .spyproject 126 | 127 | # Rope project settings 128 | .ropeproject 129 | 130 | # mkdocs documentation 131 | /site 132 | 133 | # mypy 134 | .mypy_cache/ 135 | .dmypy.json 136 | dmypy.json 137 | 138 | # Pyre type checker 139 | .pyre/ 140 | 141 | # pytype static type analyzer 142 | .pytype/ 143 | 144 | # Cython debug symbols 145 | cython_debug/ 146 | 147 | # PyCharm 148 | # JetBrains specific template is maintainted in a separate JetBrains.gitignore that can 149 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 150 | # and can be added to the global gitignore or merged into this file. For a more nuclear 151 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 152 | #.idea/ 153 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule ".github"] 2 | path = .github 3 | url = https://github.com/twilwa/.github 4 | -------------------------------------------------------------------------------- /.vscode/extensions.json: -------------------------------------------------------------------------------- 1 | { 2 | "recommendations": [ 3 | "googlecloudtools.cloudcode" 4 | ] 5 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 tonic 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: GeneralistAutogenAgent 3 | emoji: 🌍 4 | colorFrom: gray 5 | colorTo: red 6 | sdk: gradio 7 | sdk_version: 4.0.2 8 | app_file: app.py 9 | pinned: false 10 | license: mit 11 | --- 12 | 13 | ## Use and Install on the Command Line 14 | 15 | 16 | git clone https://github.com/Josephrp/LablabAutogen.git 17 | ``` 18 | 19 | ```bash 20 | cd LablabAutogen 21 | ``` 22 | 23 | ```bash 24 | nano app.py 25 | ``` 26 | 27 | edit line 17 " ```"openai_api_key": "YOUR_KEY_HERE", # OpenAI API Key``` with your key 28 | 29 | then press: 30 | 31 | ```nano 32 | control + x 33 | ``` 34 | 35 | Write : 36 | 37 | ```nano 38 | Y 39 | ``` 40 | 41 | to save then type : 42 | 43 | ```bash 44 | pip install -r requirements.txt 45 | ``` 46 | 47 | and finally : 48 | 49 | ```bash 50 | python app.py 51 | ``` 52 | to run. 53 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | import os 3 | from pydantic import BaseModel, ValidationError 4 | from plugins.sk_bing_plugin import BingPlugin 5 | from plugins.sk_web_pages_plugin import WebPagesPlugin 6 | from planning.autogen_planner import AutoGenPlanner 7 | from web_search_client import WebSearchClient 8 | from web_search_client.models import SafeSearch 9 | from azure.core.credentials import AzureKeyCredential 10 | from semantic_kernel.core_skills.text_skill import TextSkill 11 | from semantic_kernel.planning.basic_planner import BasicPlanner 12 | from dotenv import load_dotenv 13 | import semantic_kernel 14 | 15 | 16 | 17 | load_dotenv() 18 | 19 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 20 | BING_API_KEY = os.getenv("BING_API_KEY") 21 | AZURE_API_KEY = os.getenv("AZURE_API_KEY") 22 | 23 | 24 | 25 | llm_config = { 26 | "type": "openai", # "azure" or "openai" 27 | "openai_api_key": OPENAI_API_KEY, # OpenAI API Key 28 | "azure_deployment": "", # Azure OpenAI deployment name 29 | "azure_api_key": AZURE_API_KEY, # Azure OpenAI API key in the Azure portal 30 | "azure_endpoint": "" # Endpoint URL for Azure OpenAI, e.g. https://contoso.openai.azure.com/ 31 | } 32 | kernel = semantic_kernel.Kernel() 33 | kernel.import_skill(BingPlugin(BING_API_KEY)) 34 | kernel.import_skill(WebPagesPlugin()) 35 | sk_planner = AutoGenPlanner(kernel, llm_config) 36 | assistant = sk_planner.create_assistant_agent("Assistant") 37 | 38 | def get_response(question, max_auto_reply): 39 | worker = sk_planner.create_user_agent("Worker", max_auto_reply=max_auto_reply, human_input="NEVER") 40 | worker.initiate_chat(assistant, message=question) 41 | return worker.get_response() 42 | 43 | iface = gr.Interface(fn=get_response, inputs=["text", "number"], outputs="text", inputs_label=["Question", "Max Auto Reply"]) 44 | iface.launch() 45 | -------------------------------------------------------------------------------- /planning/autogen_planner.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Dict, List 2 | import semantic_kernel, autogen 3 | import datetime 4 | 5 | class AutoGenPlanner: 6 | """ 7 | Semantic Kernel planner using Conversational Programming via AutoGen. 8 | Leverages OpenAI Function Calling and AutoGen agents to solve tasks using 9 | loaded Semantic Kernel plugins. Supports functions with a single string parameter. 10 | Tested with GPT 3.5 Turbo and GPT 4, primarily uses GPT 3.5 Turbo for performance. 11 | """ 12 | 13 | ASSISTANT_PERSONA = ( 14 | f"Only use provided functions. Do not ask the user for other actions. " 15 | f"Use functions to find unavailable information. " 16 | f"Today's date: {datetime.date.today().strftime('%B %d, %Y')}. " 17 | f"Reply TERMINATE when the task is done." 18 | ) 19 | 20 | def __init__(self, kernel: semantic_kernel.Kernel, llm_config: Dict = None, builder_config_path: str = None): 21 | self.kernel = kernel 22 | self.llm_config = llm_config or {} 23 | self.builder_config_path = builder_config_path 24 | self.validate_llm_config() 25 | self.builder = self.create_builder() 26 | 27 | def create_builder(self) -> autogen.agentchat.contrib.agent_builder.AgentBuilder: 28 | """ 29 | Create an instance of AgentBuilder. 30 | """ 31 | if not self.builder_config_path: 32 | raise ValueError("Builder config path is required to create AgentBuilder.") 33 | return autogen.agentchat.contrib.agent_builder.AgentBuilder( 34 | config_path=self.builder_config_path, 35 | builder_model='gpt-4-1106-preview', 36 | agent_model='gpt-4-1106-preview' 37 | ) 38 | 39 | def build_agents_for_task(self, task_description: str): 40 | """ 41 | Build agents for a specific task using the AgentBuilder. 42 | Args: 43 | task_description (str): A description of the task for which agents are to be built. 44 | """ 45 | try: 46 | agent_list, agent_configs = self.builder.build(task_description, self.__get_autogen_config(), coding=True) 47 | print(f"Agents built successfully for task: '{task_description}'") 48 | return agent_list, agent_configs 49 | except Exception as e: 50 | print(f"Error in building agents for task '{task_description}': {e}") 51 | 52 | def create_assistant_agent(self, name: str, persona: str = ASSISTANT_PERSONA) -> autogen.AssistantAgent: 53 | return autogen.AssistantAgent(name=name, system_message=persona, llm_config=self.__get_autogen_config()) 54 | 55 | def create_user_agent( 56 | self, name: str, max_auto_reply: Optional[int] = None, human_input: Optional[str] = "ALWAYS" 57 | ) -> autogen.UserProxyAgent: 58 | return autogen.UserProxyAgent( 59 | name=name, 60 | human_input_mode=human_input, 61 | max_consecutive_auto_reply=max_auto_reply, 62 | function_map=self.__get_function_map(), 63 | ) 64 | 65 | def validate_llm_config(self): 66 | if self.llm_config.get("type") == "openai": 67 | if not self.llm_config.get("openai_api_key"): 68 | raise ValueError("OpenAI API key is required for OpenAI LLM.") 69 | elif self.llm_config.get("type") == "azure": 70 | required_keys = ["azure_api_key", "azure_deployment", "azure_endpoint"] 71 | if any(key not in self.llm_config for key in required_keys): 72 | raise ValueError("Azure OpenAI API configuration is incomplete.") 73 | else: 74 | raise ValueError("LLM type not provided, must be 'openai' or 'azure'.") 75 | 76 | def update_llm_config(self, new_config: Dict): 77 | self.llm_config = new_config 78 | self.validate_llm_config() 79 | 80 | def load_semantic_kernel_plugins(self, plugins: List[str]): 81 | """ 82 | Load Semantic Kernel plugins into the kernel. 83 | Args: 84 | plugins (List[str]): A list of plugin names to load. 85 | """ 86 | for plugin in plugins: 87 | try: 88 | self.kernel.import_skill(plugin) 89 | print(f"Plugin '{plugin}' loaded successfully.") 90 | except Exception as e: 91 | print(f"Error loading plugin '{plugin}': {e}") 92 | 93 | def __get_autogen_config(self) -> Dict: 94 | if self.llm_config["type"] == "openai": 95 | return { 96 | "functions": self.__get_function_definitions(), 97 | "config_list": [{"model": "gpt-3.5-turbo", "api_key": self.llm_config["openai_api_key"]}] 98 | } 99 | elif self.llm_config["type"] == "azure": 100 | return { 101 | "functions": self.__get_function_definitions(), 102 | "config_list": [{ 103 | "model": self.llm_config["azure_deployment"], 104 | "api_type": "azure", 105 | "api_key": self.llm_config["azure_api_key"], 106 | "api_base": self.llm_config["azure_endpoint"], 107 | "api_version": "2023-08-01-preview" 108 | }] 109 | } 110 | 111 | def __get_function_definitions(self) -> List: 112 | functions = [] 113 | sk_functions = self.kernel.skills.get_functions_view() 114 | for ns, funcs in {**sk_functions.native_functions, **sk_functions.semantic_functions}.items(): 115 | for f in funcs: 116 | if len(f.parameters) == 1 and f.parameters[0].type_ == "string": 117 | functions.append({ 118 | "name": f.name, 119 | "description": f.description, 120 | "parameters": { 121 | "type": "object", 122 | "properties": {f.parameters[0].name: {"description": f.parameters[0].description, "type": "string"}}, 123 | "required": [f.parameters[0].name] 124 | } 125 | }) 126 | return functions 127 | 128 | def __get_function_map(self) -> Dict: 129 | function_map = {} 130 | sk_functions = self.kernel.skills.get_functions_view() 131 | for ns, funcs in {**sk_functions.native_functions, **sk_functions.semantic_functions}.items(): 132 | for f in funcs: 133 | function_map[f.name] = self.kernel.skills.get_function(f.skill_name, f.name) 134 | return function_map -------------------------------------------------------------------------------- /plugins/bing_connector.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | import urllib, aiohttp 4 | from logging import Logger 5 | from typing import Any, List, Optional 6 | from semantic_kernel.connectors.search_engine.connector import ConnectorBase 7 | from semantic_kernel.utils.null_logger import NullLogger 8 | 9 | 10 | class BingConnector(ConnectorBase): 11 | """ 12 | A search engine connector that uses the Bing Search API to perform a web search. 13 | The connector can be used to read "answers" from Bing, when "snippets" are available, 14 | or simply to retrieve the URLs of the search results. 15 | """ 16 | 17 | _api_key: str 18 | 19 | def __init__(self, api_key: str, logger: Optional[Logger] = None) -> None: 20 | self._api_key = api_key 21 | self._logger = logger if logger else NullLogger() 22 | 23 | if not self._api_key: 24 | raise ValueError("Bing API key cannot be null. Please set environment variable BING_API_KEY.") 25 | 26 | async def search_url_async(self, query: str, num_results: str, offset: str) -> List[str]: 27 | """ 28 | Returns the search results URLs of the query provided by Bing web search API. 29 | Returns `num_results` results and ignores the first `offset`. 30 | 31 | :param query: search query 32 | :param num_results: the number of search results to return 33 | :param offset: the number of search results to ignore 34 | :return: list of search results 35 | """ 36 | data = await self.__search(query, num_results, offset) 37 | if data: 38 | pages = data["webPages"]["value"] 39 | self._logger.info(pages) 40 | result = list(map(lambda x: x["url"], pages)) 41 | self._logger.info(result) 42 | return result 43 | else: 44 | return [] 45 | 46 | async def search_snippet_async(self, query: str, num_results: str, offset: str) -> List[str]: 47 | """ 48 | Returns the search results Text Preview (aka snippet) of the query provided by Bing web search API. 49 | Returns `num_results` results and ignores the first `offset`. 50 | 51 | :param query: search query 52 | :param num_results: the number of search results to return 53 | :param offset: the number of search results to ignore 54 | :return: list of search results 55 | """ 56 | data = await self.__search(query, num_results, offset) 57 | if data: 58 | pages = data["webPages"]["value"] 59 | self._logger.info(pages) 60 | result = list(map(lambda x: x["snippet"], pages)) 61 | self._logger.info(result) 62 | return result 63 | else: 64 | return [] 65 | 66 | async def __search(self, query: str, num_results: str, offset: str) -> Any: 67 | """ 68 | Returns the search response of the query provided by pinging the Bing web search API. 69 | Returns the response content 70 | 71 | :param query: search query 72 | :param num_results: the number of search results to return 73 | :param offset: the number of search results to ignore 74 | :return: response content or None 75 | """ 76 | if not query: 77 | raise ValueError("query cannot be 'None' or empty.") 78 | 79 | if not num_results: 80 | num_results = 1 81 | if not offset: 82 | offset = 0 83 | 84 | num_results = int(num_results) 85 | offset = int(offset) 86 | 87 | if num_results <= 0: 88 | raise ValueError("num_results value must be greater than 0.") 89 | if num_results >= 50: 90 | raise ValueError("num_results value must be less than 50.") 91 | 92 | if offset < 0: 93 | raise ValueError("offset must be greater than 0.") 94 | 95 | self._logger.info( 96 | f"Received request for bing web search with \ 97 | params:\nquery: {query}\nnum_results: {num_results}\noffset: {offset}" 98 | ) 99 | 100 | _base_url = "https://api.bing.microsoft.com/v7.0/search" 101 | _request_url = f"{_base_url}?q={urllib.parse.quote_plus(query)}&count={num_results}&offset={offset}" 102 | 103 | self._logger.info(f"Sending GET request to {_request_url}") 104 | 105 | headers = {"Ocp-Apim-Subscription-Key": self._api_key} 106 | 107 | async with aiohttp.ClientSession() as session: 108 | async with session.get(_request_url, headers=headers, raise_for_status=True) as response: 109 | if response.status == 200: 110 | return await response.json() 111 | else: 112 | return None 113 | -------------------------------------------------------------------------------- /plugins/sk_bing_plugin.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | from semantic_kernel.skill_definition import sk_function 4 | from plugins.bing_connector import BingConnector 5 | 6 | 7 | class BingPlugin: 8 | """ 9 | A plugin to search Bing. 10 | """ 11 | 12 | def __init__(self, bing_api_key: str): 13 | self.bing = BingConnector(api_key=bing_api_key) 14 | if not bing_api_key or bing_api_key == "...": 15 | raise Exception("Bing API key is not set") 16 | 17 | @sk_function( 18 | description="Use Bing to find a page about a topic. The return is a URL of the page found.", 19 | name="find_web_page_about", 20 | input_description="Two comma separated values: #1 Offset from the first result (default zero), #2 The topic to search, e.g. '0,who won the F1 title in 2023?'.", 21 | ) 22 | async def find_web_page_about(self, input: str) -> str: 23 | """ 24 | A native function that uses Bing to find a page URL about a topic. 25 | To simplify the integration with Autogen, the input parameter is a string with two comma separated 26 | values, rather than the usual context dictionary. 27 | """ 28 | 29 | # Input validation, the error message can help self-correct the input 30 | if "," not in input: 31 | raise ValueError("The input argument must contain a comma, e.g. '0,who won the F1 title in 2023?'") 32 | 33 | parts = input.split(",", 1) 34 | result = await self.bing.search_url_async(query=parts[1], num_results=1, offset=parts[0]) 35 | if result: 36 | return result[0] 37 | else: 38 | return f"Nothing found, try again or try to adjust the topic." 39 | -------------------------------------------------------------------------------- /plugins/sk_web_pages_plugin.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | from semantic_kernel.skill_definition import sk_function 4 | from bs4 import BeautifulSoup 5 | import re, aiohttp 6 | 7 | 8 | class WebPagesPlugin: 9 | """ 10 | A plugin to interact with web pages, e.g. download the text content of a page. 11 | """ 12 | 13 | @sk_function( 14 | description="Fetch the text content of a webpage. The return is a string containing all the text.", 15 | name="fetch_webpage", 16 | input_description="URL of the page to fetch.", 17 | ) 18 | async def fetch_webpage(self, input: str) -> str: 19 | """ 20 | A native function that fetches the text content of a webpage. 21 | HTML tags are removed, and empty lines are compacted. 22 | """ 23 | if not input: 24 | raise ValueError("url cannot be `None` or empty") 25 | async with aiohttp.ClientSession() as session: 26 | async with session.get(input, raise_for_status=True) as response: 27 | html = await response.text() 28 | soup = BeautifulSoup(html, features="html.parser") 29 | # remove some elements 30 | for el in soup(["script", "style", "iframe", "img", "video", "audio"]): 31 | el.extract() 32 | 33 | # get text and compact empty lines 34 | text = soup.get_text() 35 | return re.sub(r"[\r\n][\r\n]{2,}", "\n\n", text) 36 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4~=4.12 2 | typing-extensions 3 | pydantic 4 | python-dotenv 5 | gradio 6 | azure-common 7 | azure-core 8 | azure-search-documents 9 | azure-cognitiveservices-search-websearch 10 | pyautogen 11 | semantic-kernel==0.3.14.dev0 12 | microsoft-bing-websearch 13 | microsoft-bing-visualsearch 14 | microsoft-bing-videosearch 15 | microsoft-bing-imagesearch 16 | microsoft-bing-newssearch 17 | microsoft-bing-spellcheck 18 | microsoft-bing-entitysearch 19 | microsoft-bing-autosuggest 20 | microsoft-bing-customimagesearch 21 | microsoft-bing-customwebsearch 22 | msrest 23 | msrestazure 24 | --------------------------------------------------------------------------------