├── .gitignore ├── Dockerfile ├── md_tests.py ├── simplest_summary.py ├── summaries ├── langchain.md ├── summarize_article.py ├── summary.md └── summary1.md ├── summarize.py ├── test_scrape.py └── try_save_md.py /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | OAI_CONFIG_LIST 3 | .cache/ -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use an official Python runtime as a parent image 2 | FROM python:3.8-slim 3 | 4 | # Set the working directory in the container 5 | WORKDIR /app 6 | 7 | # Copy the current directory contents into the container at /app 8 | COPY . /app 9 | 10 | # # Explicitly copy the config 11 | # COPY OAI_CONFIG_LIST /app/OAI_CONFIG_LIST 12 | 13 | 14 | # Install the required packages 15 | RUN pip install --trusted-host pypi.python.org pyautogen beautifulsoup4 docker 16 | # Uncomment the line below if you need blendsearch 17 | # RUN pip install --trusted-host pypi.python.org "pyautogen[blendsearch]" 18 | 19 | # Specify the command to run on container start 20 | CMD ["python", "summarize.py"] 21 | -------------------------------------------------------------------------------- /md_tests.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import requests 4 | 5 | from bs4 import BeautifulSoup 6 | 7 | import autogen 8 | from autogen import config_list_from_json 9 | 10 | from langchain.chat_models import ChatOpenAI 11 | from langchain.prompts import ChatPromptTemplate 12 | 13 | # from langchain.text_splitter import RecursiveCharacterTextSplitter 14 | 15 | 16 | import openai 17 | 18 | from dotenv import load_dotenv 19 | 20 | load_dotenv() 21 | 22 | # Get api key 23 | config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST") 24 | llm_config = {"config_list": config_list} 25 | openai.api_key = os.getenv("OPENAI_API_KEY") 26 | SERPER_API_KEY = os.getenv("SERPER_API_KEY") 27 | BROWSERLESS_API_KEY = os.getenv("BROWSERLESS_API_KEY") 28 | 29 | # user_proxy = autogen.UserProxyAgent( 30 | # name="User_proxy", 31 | # system_message="A human admin who will provide the resources to summarize and save the summary.", 32 | # code_execution_config={"work_dir": "summaries"}, 33 | # human_input_mode="ALWAYS", 34 | # ) 35 | 36 | # writer = autogen.AssistantAgent( 37 | # name="Writer", 38 | # llm_config=llm_config, 39 | # ) 40 | 41 | 42 | def extract_article(page_content): 43 | extractor = autogen.AssistantAgent( 44 | name="Extractor", 45 | llm_config=llm_config, 46 | system_message="An AI assistant to extract the main content from the provided webpage.", 47 | ) 48 | 49 | proxy = autogen.UserProxyAgent(name="proxy", llm_config=llm_config) 50 | 51 | proxy.initiate_chat( 52 | extractor, 53 | message=f"""You will be given some Page Content between triple backticks. It's a result of function `soup(get_text)`. Your role is to recognize where the actual content (article, blog post, etc.) begins and ends and return it to me. I want only the article text. 54 | [Page Content]: ```{page_content}```""", 55 | ) 56 | 57 | return proxy.last_message()["content"] 58 | 59 | 60 | def scrape(url: str): 61 | # scrape website, and also will summarize the content based on objective if the content is too large 62 | # objective is the original objective & task that user give to the agent, url is the url of the website to be scraped 63 | 64 | print("Scraping website...") 65 | 66 | # Prepare the URL 67 | browserless_url = ( 68 | f"https://chrome.browserless.io/content?token={BROWSERLESS_API_KEY}" 69 | ) 70 | 71 | # Define the headers for the request 72 | headers = { 73 | "Cache-Control": "no-cache", 74 | "Content-Type": "application/json", 75 | } 76 | 77 | # Define the data to be sent in the request 78 | data = {"url": url} 79 | 80 | # Convert Python object to JSON string 81 | data_json = json.dumps(data) 82 | 83 | # Send the POST request 84 | response = requests.post( 85 | browserless_url, 86 | headers=headers, 87 | data=data_json, 88 | ) 89 | 90 | # Check the response status code 91 | if response.status_code == 200: 92 | soup = BeautifulSoup(response.content, "html.parser") 93 | text = soup.get_text() 94 | print("CONTENTTTTTT:", text) 95 | article = extract_article(text) 96 | # if len(text) > 8000: 97 | # output = summary(text) 98 | # return output 99 | # else: 100 | # return text 101 | else: 102 | print(f"HTTP request failed with status code {response.status_code}") 103 | 104 | summary = summarize(article) 105 | return summary 106 | 107 | 108 | def summarize(content): 109 | llm = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo-16k") 110 | 111 | # define the prompt template 112 | prompt_template = """ 113 | Write a detailed summary of the piece of content delimited by triple backticks.\ 114 | Convert the summary to the markdown format.\ 115 | Content: ```{text}``` 116 | SUMMARY: 117 | """ 118 | 119 | # format prompt template 120 | prompt = ChatPromptTemplate.from_template(prompt_template) 121 | formatted_prompt = prompt.format_messages(text=content) 122 | 123 | # summarize content 124 | summary = llm(formatted_prompt) 125 | 126 | return summary 127 | 128 | 129 | # def research(query): 130 | llm_config_researcher = { 131 | "functions": [ 132 | # { 133 | # "name": "search", 134 | # "description": "google search for relevant information", 135 | # "parameters": { 136 | # "type": "object", 137 | # "properties": { 138 | # "query": { 139 | # "type": "string", 140 | # "description": "Google search query", 141 | # } 142 | # }, 143 | # "required": ["query"], 144 | # }, 145 | # }, 146 | { 147 | "name": "scrape", 148 | "description": "Scraping website content based on url", 149 | "parameters": { 150 | "type": "object", 151 | "properties": { 152 | "url": { 153 | "type": "string", 154 | "description": "Website url to scrape", 155 | } 156 | }, 157 | "required": ["url"], 158 | }, 159 | }, 160 | # { 161 | # "name": "summarize", 162 | # "description": "Summarize a given text content", 163 | # "parameters": { 164 | # "type": "object", 165 | # "properties": { 166 | # "content": { 167 | # "type": "string", 168 | # "description": "Text content to summarize", 169 | # } 170 | # }, 171 | # "required": ["content"], 172 | # }, 173 | # }, 174 | ], 175 | "config_list": config_list, 176 | } 177 | 178 | # researcher = autogen.AssistantAgent( 179 | # name="researcher", 180 | # # system_message="Research about a given query, collect as many information as possible, and generate detailed research results with loads of technique details with all reference links attached; Add TERMINATE to the end of the research report;", 181 | # system_message="", 182 | # llm_config=llm_config_researcher, 183 | # ) 184 | 185 | writer = autogen.AssistantAgent( 186 | name="writer", 187 | system_message="You are a professional researcher. You specialize at summarizing content to get the essense of the provided article or blog. You will improve your summary based on the suggestions from your admin. Use scrape function to get the content based on URL. After 2 rounds of content iteration, add TERMINATE to the end of the message", 188 | llm_config=llm_config_researcher, 189 | ) 190 | 191 | # reviewer = autogen.AssistantAgent( 192 | # name="reviewer", 193 | # system_message="You are a world class hash tech blog content critic, you will review & critic the written summary and provide feedback to writer.After 2 rounds of content iteration, add TERMINATE to the end of the message", 194 | # llm_config={"config_list": config_list}, 195 | # ) 196 | 197 | user_proxy = autogen.UserProxyAgent( 198 | name="User_proxy", 199 | code_execution_config={"work_dir": "summaries"}, 200 | is_termination_msg=lambda x: x.get("content", "") 201 | and x.get("content", "").rstrip().endswith("TERMINATE"), 202 | human_input_mode="TERMINATE", 203 | function_map={ 204 | "scrape": scrape, 205 | }, 206 | ) 207 | 208 | user_proxy.initiate_chat( 209 | writer, 210 | message="""Summarize the article: https://towardsdatascience.com/exploring-gemba-a-new-llm-based-metric-for-translation-quality-assessment-3a3383de6d1f""", 211 | ) 212 | # message="""Summarize the article: https://www.pinecone.io/learn/series/langchain/langchain-agents/""", 213 | # message="""Summarize the article: https://medium.com/codingthesmartway-com-blog/mastering-langchain-agents-your-personal-journey-begins-here-e4dba27c8f56""", 214 | 215 | 216 | # user_proxy.initiate_chat(writer, message=query) 217 | 218 | 219 | # user_proxy = autogen.UserProxyAgent( 220 | # name="User_proxy", 221 | # human_input_mode="TERMINATE", 222 | # function_map={ 223 | # # "write_content": write_content, 224 | # "research": research, 225 | # }, 226 | # ) 227 | 228 | # user_proxy.initiate_chat( 229 | # writing_assistant, message="write a blog about autogen multi AI agent framework" 230 | # ) 231 | 232 | 233 | # # Define research function 234 | # def search(query): 235 | # url = "https://google.serper.dev/search" 236 | 237 | # payload = json.dumps({"q": query}) 238 | # headers = { 239 | # "X-API-KEY": SERPER_API_KEY, 240 | # "Content-Type": "application/json", 241 | # } 242 | 243 | # response = requests.request("POST", url, headers=headers, data=payload) 244 | 245 | # return response.json() 246 | 247 | 248 | ### --------------------- ### 249 | #
250 | # TL;DR: We demonstrate how to use autogen for local LLM application. As an example, we will initiate an endpoint using FastChat and perform inference on ChatGLMv2-6b. 251 | 252 | # Preparations​ 253 | # Clone FastChat​ 254 | # FastChat provides OpenAI-compatible APIs for its supported models, so you can use FastChat as a local drop-in replacement for OpenAI APIs. However, its code needs minor modification in order to function properly. 255 | 256 | # git clone https://github.com/lm-sys/FastChat.git 257 | # cd FastChat 258 | # Download checkpoint​ 259 | # ChatGLM-6B is an open bilingual language model based on General Language Model (GLM) framework, with 6.2 billion parameters. ChatGLM2-6B is its second-generation version. 260 | 261 | # Before downloading from HuggingFace Hub, you need to have Git LFS installed. 262 | 263 | # git clone https://huggingface.co/THUDM/chatglm2-6b 264 | # Initiate server​ 265 | # First, launch the controller 266 | 267 | # python -m fastchat.serve.controller 268 | # Then, launch the model worker(s) 269 | 270 | # python -m fastchat.serve.model_worker --model-path chatglm2-6b 271 | # Finally, launch the RESTful API server 272 | 273 | # python -m fastchat.serve.openai_api_server --host localhost --port 8000 274 | # Normally this will work. However, if you encounter error like this, commenting out all the lines containing finish_reason in fastchat/protocol/api_protocal.py and fastchat/protocol/openai_api_protocol.py will fix the problem. The modified code looks like: 275 | 276 | # class CompletionResponseChoice(BaseModel): 277 | # index: int 278 | # text: str 279 | # logprobs: Optional[int] = None 280 | # # finish_reason: Optional[Literal["stop", "length"]] 281 | 282 | # class CompletionResponseStreamChoice(BaseModel): 283 | # index: int 284 | # text: str 285 | # logprobs: Optional[float] = None 286 | # # finish_reason: Optional[Literal["stop", "length"]] = None 287 | # Interact with model using oai.Completion​ 288 | # Now the models can be directly accessed through openai-python library as well as autogen.oai.Completion and autogen.oai.ChatCompletion. 289 | 290 | # from autogen import oai 291 | 292 | # # create a text completion request 293 | # response = oai.Completion.create( 294 | # config_list=[ 295 | # { 296 | # "model": "chatglm2-6b", 297 | # "api_base": "http://localhost:8000/v1", 298 | # "api_type": "open_ai", 299 | # "api_key": "NULL", # just a placeholder 300 | # } 301 | # ], 302 | # prompt="Hi", 303 | # ) 304 | # print(response) 305 | 306 | # # create a chat completion request 307 | # response = oai.ChatCompletion.create( 308 | # config_list=[ 309 | # { 310 | # "model": "chatglm2-6b", 311 | # "api_base": "http://localhost:8000/v1", 312 | # "api_type": "open_ai", 313 | # "api_key": "NULL", 314 | # } 315 | # ], 316 | # messages=[{"role": "user", "content": "Hi"}] 317 | # ) 318 | # print(response) 319 | # If you would like to switch to different models, download their checkpoints and specify model path when launching model worker(s). 320 | 321 | # interacting with multiple local LLMs​ 322 | # If you would like to interact with multiple LLMs on your local machine, replace the model_worker step above with a multi model variant: 323 | 324 | # python -m fastchat.serve.multi_model_worker \ 325 | # --model-path lmsys/vicuna-7b-v1.3 \ 326 | # --model-names vicuna-7b-v1.3 \ 327 | # --model-path chatglm2-6b \ 328 | # --model-names chatglm2-6b 329 | # The inference code would be: 330 | 331 | # from autogen import oai 332 | 333 | # # create a chat completion request 334 | # response = oai.ChatCompletion.create( 335 | # config_list=[ 336 | # { 337 | # "model": "chatglm2-6b", 338 | # "api_base": "http://localhost:8000/v1", 339 | # "api_type": "open_ai", 340 | # "api_key": "NULL", 341 | # }, 342 | # { 343 | # "model": "vicuna-7b-v1.3", 344 | # "api_base": "http://localhost:8000/v1", 345 | # "api_type": "open_ai", 346 | # "api_key": "NULL", 347 | # } 348 | # ], 349 | # messages=[{"role": "user", "content": "Hi"}] 350 | # ) 351 | # print(response) 352 | #
353 | -------------------------------------------------------------------------------- /simplest_summary.py: -------------------------------------------------------------------------------- 1 | import autogen 2 | from autogen import config_list_from_json 3 | 4 | 5 | config_list = config_list_from_json( 6 | env_or_file="OAI_CONFIG_LIST", 7 | ) 8 | llm_config = {"config_list": config_list} 9 | 10 | 11 | writer = autogen.AssistantAgent( 12 | name="Writer", 13 | system_message=""" 14 | You are a professional content researcher.\ 15 | You specialize at summarizing content of the provided article or blog.\ 16 | Your summaries are detailed and well structured.\ 17 | Add TERMINATE to the end of the message""", 18 | llm_config=llm_config, 19 | ) 20 | 21 | user_proxy = autogen.UserProxyAgent( 22 | name="User_proxy", 23 | is_termination_msg=lambda x: x.get("content", "") 24 | and x.get("content", "").rstrip().endswith("TERMINATE"), 25 | human_input_mode="TERMINATE", 26 | ) 27 | 28 | 29 | # Start the conversation 30 | user_proxy.initiate_chat( 31 | writer, 32 | message="""Summarize the article: https://blog.stackademic.com/using-chatgpt-for-web-scraping-a-practical-guide-673fa2bbfac1""", 33 | ) 34 | -------------------------------------------------------------------------------- /summaries/langchain.md: -------------------------------------------------------------------------------- 1 | 2 | The article "LangChain 101: Part 1. Building Simple Q&A App" by Ivan Reznikov presents an introductory guide to LangChain, a powerful framework used for generating text-based applications. The framework's primary focus is language and deals with tasks such as creating text, answering questions, translating languages, among other text-related tasks. 3 | 4 | LangChain uses six basic components: 5 | 1. **Models**: Large Language Models (LLMs) used for generating text, answering questions, translating languages, and more. 6 | 2. **Prompts**: Text components used to guide the output of LLMs. They can be simple or complex. 7 | 3. **Chains**: Sequences of instructions performed by the LangChain framework. 8 | 4. **Memory**: Used to store data the LLM can access later. 9 | 5. **Indexes**: Unique data structures for storing information about data content. 10 | 6. **Agents and Tools**: Agents perform specific tasks like text generation or language translation while tools assist with other functions like data manipulation. 11 | 12 | The article also provides a practical example of using LangChain to build a Q&A system that answers questions about official holidays in the United Arab Emirates. It demonstrates how to use the different elements i.e., models, chains, prompts, agents, memory, and indexes, in the construction of the system. The system uses a unique prompt based on the query at hand and builds a chain that combines LLMs and memory to process the question and provide an answer that makes use of retained chat history. 13 | 14 | In the upcoming LangChain 101 course, readers will learn how to use their computers' models, data, and execute more complex tasks. The course will also teach them how to use memory to store information and build real-world applications with LangChain. 15 | -------------------------------------------------------------------------------- /summaries/summarize_article.py: -------------------------------------------------------------------------------- 1 | with open( 2 | "/home/kris/Documents/SmartNotes/SecondBrain/Evergreen Notes/langchain.md", "w" 3 | ) as file: 4 | file.write( 5 | """ 6 | ## Main Benefits of LangChain: 7 | * It simplifies the use of Large Language Models (LLMs) for specific tasks. 8 | * Allows combining the power of LLMs with other programming techniques. 9 | * Provides an ability to control and influence LLM's output via prompts. 10 | * Memory feature allowing LLM to learn from previous interactions and build a knowledge base. 11 | * Offers unique components like Chains, which are sequences of instructions executed to perform a task. 12 | * Facilitates the construction of unique and complex chains of instructions that can perform sophisticated operations. 13 | 14 | ## Main Modules of LangChain: 15 | * **Models**: Large language models trained on massive datasets of text and code. 16 | * **Prompts**: Pieces of text that guide the LLM to generate the desired output. 17 | * **Chains**: Sequences of instructions the LangChain framework executes to perform a task. 18 | * **Memory**: A method of storing data that the LLM can access later. 19 | * **Indexes**: Unique data structures to store information about the data content. 20 | * **Agents and Tools**: Agents are reusable components that can perform specific tasks, while Tools are function libraries to aid in developing various agents. 21 | """ 22 | ) 23 | -------------------------------------------------------------------------------- /summaries/summary.md: -------------------------------------------------------------------------------- 1 | # Your article text here 2 | 3 | ## Key Takeaways: 4 | -------------------------------------------------------------------------------- /summaries/summary1.md: -------------------------------------------------------------------------------- 1 | TL;DR: This article provides a demonstration of how to use autogen for local LLM application using FastChat and ChatGLMv2-6b. The article covers preparations, cloning FastChat and downloading the checkpoint for ChatGLM-6B. It also includes instructions for initiating the server and interacting with the model using oai.Completion. Lastly, it explains how to interact with multiple local LLMs on your machine. -------------------------------------------------------------------------------- /summarize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import requests 4 | from bs4 import BeautifulSoup 5 | 6 | import autogen 7 | from autogen import config_list_from_json 8 | 9 | from dotenv import load_dotenv 10 | 11 | load_dotenv() 12 | # Get api key 13 | config_list = config_list_from_json( 14 | env_or_file="OAI_CONFIG_LIST", 15 | filter_dict={ 16 | "model": [ 17 | "gpt-4", 18 | "gpt-4-0613", # required for function calling 19 | ], 20 | }, 21 | ) 22 | llm_config = {"config_list": config_list, "seed": 41, "request_timeout": 240} 23 | 24 | BROWSERLESS_API_KEY = os.getenv("BROWSERLESS_API_KEY") 25 | 26 | 27 | def scrape(url: str): 28 | print("Using the scrape function...") 29 | # Prepare the URL 30 | browserless_url = ( 31 | f"https://chrome.browserless.io/content?token={BROWSERLESS_API_KEY}" 32 | ) 33 | 34 | # Define the headers for the request 35 | headers = { 36 | "Cache-Control": "no-cache", 37 | "Content-Type": "application/json", 38 | } 39 | 40 | # Define the data to be sent in the request 41 | data = {"url": url} 42 | 43 | # Convert Python object to JSON string 44 | data_json = json.dumps(data) 45 | 46 | # Send the POST request 47 | response = requests.post( 48 | browserless_url, 49 | headers=headers, 50 | data=data_json, 51 | ) 52 | 53 | # Check the response status code 54 | if response.status_code == 200: 55 | soup = BeautifulSoup(response.content, "html.parser") 56 | content = soup.get_text() 57 | # print("Article text:", content) 58 | else: 59 | print(f"HTTP request failed with status code {response.status_code}") 60 | 61 | return content 62 | 63 | 64 | # function callings for the agent 65 | llm_config_writer = { 66 | "functions": [ 67 | { 68 | "name": "scrape", 69 | "description": "Scraping website content based on url", 70 | "parameters": { 71 | "type": "object", 72 | "properties": { 73 | "url": { 74 | "type": "string", 75 | "description": "Website url to scrape", 76 | } 77 | }, 78 | "required": ["url"], 79 | }, 80 | }, 81 | ], 82 | "config_list": config_list, 83 | } 84 | 85 | 86 | writer = autogen.AssistantAgent( 87 | name="Writer", 88 | system_message=""" 89 | You are a professional content researcher.\ 90 | You specialize at summarizing content of the provided article or blog.\ 91 | Your summaries are detailed and well structured.\ 92 | Use scrape function to get the content based on URL.\ 93 | Add TERMINATE to the end of the message""", 94 | llm_config=llm_config_writer, 95 | ) 96 | 97 | user_proxy = autogen.UserProxyAgent( 98 | name="User_proxy", 99 | is_termination_msg=lambda x: x.get("content", "") 100 | and x.get("content", "").rstrip().endswith("TERMINATE"), 101 | human_input_mode="TERMINATE", 102 | function_map={ 103 | "scrape": scrape, 104 | }, 105 | ) 106 | 107 | 108 | # Start the conversation 109 | user_proxy.initiate_chat( 110 | writer, 111 | message="""Summarize the article and highlight key takeaways: https://blog.stackademic.com/using-chatgpt-for-web-scraping-a-practical-guide-673fa2bbfac1""", 112 | ) 113 | -------------------------------------------------------------------------------- /test_scrape.py: -------------------------------------------------------------------------------- 1 | def scrape(url: str): 2 | # scrape website, and also will summarize the content based on objective if the content is too large 3 | # objective is the original objective & task that user give to the agent, url is the url of the website to be scraped 4 | 5 | print("Scraping website...") 6 | 7 | # Prepare the URL 8 | browserless_url = ( 9 | f"https://chrome.browserless.io/content?token={BROWSERLESS_API_KEY}" 10 | ) 11 | 12 | # Define the headers for the request 13 | headers = { 14 | "Cache-Control": "no-cache", 15 | "Content-Type": "application/json", 16 | } 17 | 18 | # Define the data to be sent in the request 19 | data = {"url": url} 20 | 21 | # Convert Python object to JSON string 22 | data_json = json.dumps(data) 23 | 24 | # Send the POST request 25 | response = requests.post( 26 | browserless_url, 27 | headers=headers, 28 | data=data_json, 29 | ) 30 | 31 | # Check the response status code 32 | if response.status_code == 200: 33 | soup = BeautifulSoup(response.content, "html.parser") 34 | text = soup.get_text() 35 | print("CONTENTTTTTT:", text) 36 | # if len(text) > 8000: 37 | # output = summary(text) 38 | # return output 39 | # else: 40 | # return text 41 | else: 42 | print(f"HTTP request failed with status code {response.status_code}") 43 | -------------------------------------------------------------------------------- /try_save_md.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import requests 4 | from bs4 import BeautifulSoup 5 | 6 | import autogen 7 | from autogen import config_list_from_json 8 | 9 | from dotenv import load_dotenv 10 | 11 | load_dotenv() 12 | # Get api key 13 | config_list = config_list_from_json( 14 | env_or_file="OAI_CONFIG_LIST", 15 | filter_dict={ 16 | "model": [ 17 | "gpt-4", 18 | "gpt-4-0613", 19 | ], 20 | }, 21 | ) 22 | llm_config = {"config_list": config_list, "seed": 42, "request_timeout": 240} 23 | 24 | BROWSERLESS_API_KEY = os.getenv("BROWSERLESS_API_KEY") 25 | 26 | 27 | def scrape(url: str): 28 | print("Using the scrape function...") 29 | # Prepare the URL 30 | browserless_url = ( 31 | f"https://chrome.browserless.io/content?token={BROWSERLESS_API_KEY}" 32 | ) 33 | 34 | # Define the headers for the request 35 | headers = { 36 | "Cache-Control": "no-cache", 37 | "Content-Type": "application/json", 38 | } 39 | 40 | # Define the data to be sent in the request 41 | data = {"url": url} 42 | 43 | # Convert Python object to JSON string 44 | data_json = json.dumps(data) 45 | 46 | # Send the POST request 47 | response = requests.post( 48 | browserless_url, 49 | headers=headers, 50 | data=data_json, 51 | ) 52 | 53 | # Check the response status code 54 | if response.status_code == 200: 55 | soup = BeautifulSoup(response.content, "html.parser") 56 | content = soup.get_text() 57 | # print("Article text:", content) 58 | else: 59 | print(f"HTTP request failed with status code {response.status_code}") 60 | 61 | return content 62 | 63 | 64 | def save_summary(content: str, filename: str): 65 | obs_dir = ( 66 | "/home/kris/Documents/SmartNotes/SecondBrain/AutoGen/" # path to Second Brain 67 | ) 68 | with open(f"{obs_dir}{filename}", "w") as file: 69 | file.write(content) 70 | 71 | 72 | # function callings for the agent 73 | llm_config_writer = { 74 | "functions": [ 75 | { 76 | "name": "scrape", 77 | "description": "Scraping website content based on url", 78 | "parameters": { 79 | "type": "object", 80 | "properties": { 81 | "url": { 82 | "type": "string", 83 | "description": "Website url to scrape", 84 | } 85 | }, 86 | "required": ["url"], 87 | }, 88 | }, 89 | { 90 | "name": "save_summary", 91 | "description": "Saving summary content to file", 92 | "parameters": { 93 | "type": "object", 94 | "properties": { 95 | "content": { 96 | "type": "string", 97 | "description": "The summary of the article in MarkDown format", 98 | }, 99 | "filename": { 100 | "type": "string", 101 | "description": "The filename to save the summary", 102 | }, 103 | }, 104 | "required": ["content", "filename"], 105 | }, 106 | }, 107 | ], 108 | "config_list": config_list, 109 | } 110 | 111 | 112 | writer = autogen.AssistantAgent( 113 | name="Writer", 114 | system_message=""" 115 | You are a professional content researcher.\ 116 | You specialize at summarizing content of the provided article or blog.\ 117 | Your summaries are detailed and well structured.\ 118 | Use scrape function to get the content based on URL.\ 119 | Then use save_summary function to save the generated summary.\ 120 | Add TERMINATE to the end of the message""", 121 | llm_config=llm_config_writer, 122 | ) 123 | 124 | 125 | user_proxy = autogen.UserProxyAgent( 126 | name="User_proxy", 127 | is_termination_msg=lambda x: x.get("content", "") 128 | and x.get("content", "").rstrip().endswith("TERMINATE"), 129 | human_input_mode="TERMINATE", 130 | function_map={ 131 | "scrape": scrape, 132 | "save_summary": save_summary, 133 | }, 134 | ) 135 | 136 | 137 | # Start the conversation 138 | user_proxy.initiate_chat( 139 | writer, 140 | message="""Summarize the article and highlight key takeaways: https://python.plainenglish.io/python-7-mind-blowing-use-cases-of-the-lambda-function-3bb896f866af""", 141 | ) 142 | 143 | # message="""Summarize the article and highlight key takeaways: https://python.plainenglish.io/python-7-mind-blowing-use-cases-of-the-lambda-function-3bb896f866af""", 144 | # message="""Summarize the article: https://pub.towardsai.net/langchain-101-part-1-building-simple-q-a-app-90d9c4e815f3""", 145 | # message="""Summarize the article: https://towardsdatascience.com/introducing-keyllm-keyword-extraction-with-llms-39924b504813""", 146 | # message="""Summarize the article: https://towardsdatascience.com/exploring-gemba-a-new-llm-based-metric-for-translation-quality-assessment-3a3383de6d1f""", 147 | 148 | # /home/kris/Documents/SmartNotes/SecondBrain/AutoGen 149 | --------------------------------------------------------------------------------