├── .gitignore
├── Dockerfile
├── md_tests.py
├── simplest_summary.py
├── summaries
    ├── langchain.md
    ├── summarize_article.py
    ├── summary.md
    └── summary1.md
├── summarize.py
├── test_scrape.py
└── try_save_md.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | OAI_CONFIG_LIST
3 | .cache/


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use an official Python runtime as a parent image
 2 | FROM python:3.8-slim
 3 | 
 4 | # Set the working directory in the container
 5 | WORKDIR /app
 6 | 
 7 | # Copy the current directory contents into the container at /app
 8 | COPY . /app
 9 | 
10 | # # Explicitly copy the config
11 | # COPY OAI_CONFIG_LIST /app/OAI_CONFIG_LIST
12 | 
13 | 
14 | # Install the required packages
15 | RUN pip install --trusted-host pypi.python.org pyautogen beautifulsoup4 docker
16 | # Uncomment the line below if you need blendsearch
17 | # RUN pip install --trusted-host pypi.python.org "pyautogen[blendsearch]"
18 | 
19 | # Specify the command to run on container start
20 | CMD ["python", "summarize.py"]
21 | 


--------------------------------------------------------------------------------
/md_tests.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import requests
  4 | 
  5 | from bs4 import BeautifulSoup
  6 | 
  7 | import autogen
  8 | from autogen import config_list_from_json
  9 | 
 10 | from langchain.chat_models import ChatOpenAI
 11 | from langchain.prompts import ChatPromptTemplate
 12 | 
 13 | # from langchain.text_splitter import RecursiveCharacterTextSplitter
 14 | 
 15 | 
 16 | import openai
 17 | 
 18 | from dotenv import load_dotenv
 19 | 
 20 | load_dotenv()
 21 | 
 22 | # Get api key
 23 | config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST")
 24 | llm_config = {"config_list": config_list}
 25 | openai.api_key = os.getenv("OPENAI_API_KEY")
 26 | SERPER_API_KEY = os.getenv("SERPER_API_KEY")
 27 | BROWSERLESS_API_KEY = os.getenv("BROWSERLESS_API_KEY")
 28 | 
 29 | # user_proxy = autogen.UserProxyAgent(
 30 | #     name="User_proxy",
 31 | #     system_message="A human admin who will provide the resources to summarize and save the summary.",
 32 | #     code_execution_config={"work_dir": "summaries"},
 33 | #     human_input_mode="ALWAYS",
 34 | # )
 35 | 
 36 | # writer = autogen.AssistantAgent(
 37 | #     name="Writer",
 38 | #     llm_config=llm_config,
 39 | # )
 40 | 
 41 | 
 42 | def extract_article(page_content):
 43 |     extractor = autogen.AssistantAgent(
 44 |         name="Extractor",
 45 |         llm_config=llm_config,
 46 |         system_message="An AI assistant to extract the main content from the provided webpage.",
 47 |     )
 48 | 
 49 |     proxy = autogen.UserProxyAgent(name="proxy", llm_config=llm_config)
 50 | 
 51 |     proxy.initiate_chat(
 52 |         extractor,
 53 |         message=f"""You will be given some Page Content between triple backticks. It's a result of function `soup(get_text)`. Your role is to recognize where the actual content (article, blog post, etc.) begins and ends and return it to me. I want only the article text.
 54 |         [Page Content]: ```{page_content}```""",
 55 |     )
 56 | 
 57 |     return proxy.last_message()["content"]
 58 | 
 59 | 
 60 | def scrape(url: str):
 61 |     # scrape website, and also will summarize the content based on objective if the content is too large
 62 |     # objective is the original objective & task that user give to the agent, url is the url of the website to be scraped
 63 | 
 64 |     print("Scraping website...")
 65 | 
 66 |     # Prepare the URL
 67 |     browserless_url = (
 68 |         f"https://chrome.browserless.io/content?token={BROWSERLESS_API_KEY}"
 69 |     )
 70 | 
 71 |     # Define the headers for the request
 72 |     headers = {
 73 |         "Cache-Control": "no-cache",
 74 |         "Content-Type": "application/json",
 75 |     }
 76 | 
 77 |     # Define the data to be sent in the request
 78 |     data = {"url": url}
 79 | 
 80 |     # Convert Python object to JSON string
 81 |     data_json = json.dumps(data)
 82 | 
 83 |     # Send the POST request
 84 |     response = requests.post(
 85 |         browserless_url,
 86 |         headers=headers,
 87 |         data=data_json,
 88 |     )
 89 | 
 90 |     # Check the response status code
 91 |     if response.status_code == 200:
 92 |         soup = BeautifulSoup(response.content, "html.parser")
 93 |         text = soup.get_text()
 94 |         print("CONTENTTTTTT:", text)
 95 |         article = extract_article(text)
 96 |         # if len(text) > 8000:
 97 |         #     output = summary(text)
 98 |         #     return output
 99 |         # else:
100 |         #     return text
101 |     else:
102 |         print(f"HTTP request failed with status code {response.status_code}")
103 | 
104 |     summary = summarize(article)
105 |     return summary
106 | 
107 | 
108 | def summarize(content):
109 |     llm = ChatOpenAI(temperature=0.1, model="gpt-3.5-turbo-16k")
110 | 
111 |     # define the prompt template
112 |     prompt_template = """
113 |     Write a detailed summary of the piece of content delimited by triple backticks.\
114 |     Convert the summary to the markdown format.\
115 |     Content: ```{text}```
116 |     SUMMARY:
117 |     """
118 | 
119 |     # format prompt template
120 |     prompt = ChatPromptTemplate.from_template(prompt_template)
121 |     formatted_prompt = prompt.format_messages(text=content)
122 | 
123 |     # summarize content
124 |     summary = llm(formatted_prompt)
125 | 
126 |     return summary
127 | 
128 | 
129 | # def research(query):
130 | llm_config_researcher = {
131 |     "functions": [
132 |         # {
133 |         #     "name": "search",
134 |         #     "description": "google search for relevant information",
135 |         #     "parameters": {
136 |         #         "type": "object",
137 |         #         "properties": {
138 |         #             "query": {
139 |         #                 "type": "string",
140 |         #                 "description": "Google search query",
141 |         #             }
142 |         #         },
143 |         #         "required": ["query"],
144 |         #     },
145 |         # },
146 |         {
147 |             "name": "scrape",
148 |             "description": "Scraping website content based on url",
149 |             "parameters": {
150 |                 "type": "object",
151 |                 "properties": {
152 |                     "url": {
153 |                         "type": "string",
154 |                         "description": "Website url to scrape",
155 |                     }
156 |                 },
157 |                 "required": ["url"],
158 |             },
159 |         },
160 |         # {
161 |         #     "name": "summarize",
162 |         #     "description": "Summarize a given text content",
163 |         #     "parameters": {
164 |         #         "type": "object",
165 |         #         "properties": {
166 |         #             "content": {
167 |         #                 "type": "string",
168 |         #                 "description": "Text content to summarize",
169 |         #             }
170 |         #         },
171 |         #         "required": ["content"],
172 |         #     },
173 |         # },
174 |     ],
175 |     "config_list": config_list,
176 | }
177 | 
178 | # researcher = autogen.AssistantAgent(
179 | #     name="researcher",
180 | #     # system_message="Research about a given query, collect as many information as possible, and generate detailed research results with loads of technique details with all reference links attached; Add TERMINATE to the end of the research report;",
181 | #     system_message="",
182 | #     llm_config=llm_config_researcher,
183 | # )
184 | 
185 | writer = autogen.AssistantAgent(
186 |     name="writer",
187 |     system_message="You are a professional researcher. You specialize at summarizing content to get the essense of the provided article or blog. You will improve your summary based on the suggestions from your admin. Use scrape function to get the content based on URL. After 2 rounds of content iteration, add TERMINATE to the end of the message",
188 |     llm_config=llm_config_researcher,
189 | )
190 | 
191 | # reviewer = autogen.AssistantAgent(
192 | #     name="reviewer",
193 | #     system_message="You are a world class hash tech blog content critic, you will review & critic the written summary and provide feedback to writer.After 2 rounds of content iteration, add TERMINATE to the end of the message",
194 | #     llm_config={"config_list": config_list},
195 | # )
196 | 
197 | user_proxy = autogen.UserProxyAgent(
198 |     name="User_proxy",
199 |     code_execution_config={"work_dir": "summaries"},
200 |     is_termination_msg=lambda x: x.get("content", "")
201 |     and x.get("content", "").rstrip().endswith("TERMINATE"),
202 |     human_input_mode="TERMINATE",
203 |     function_map={
204 |         "scrape": scrape,
205 |     },
206 | )
207 | 
208 | user_proxy.initiate_chat(
209 |     writer,
210 |     message="""Summarize the article: https://towardsdatascience.com/exploring-gemba-a-new-llm-based-metric-for-translation-quality-assessment-3a3383de6d1f""",
211 | )
212 | # message="""Summarize the article: https://www.pinecone.io/learn/series/langchain/langchain-agents/""",
213 | # message="""Summarize the article: https://medium.com/codingthesmartway-com-blog/mastering-langchain-agents-your-personal-journey-begins-here-e4dba27c8f56""",
214 | 
215 | 
216 | # user_proxy.initiate_chat(writer, message=query)
217 | 
218 | 
219 | # user_proxy = autogen.UserProxyAgent(
220 | #     name="User_proxy",
221 | #     human_input_mode="TERMINATE",
222 | #     function_map={
223 | #         # "write_content": write_content,
224 | #         "research": research,
225 | #     },
226 | # )
227 | 
228 | # user_proxy.initiate_chat(
229 | #     writing_assistant, message="write a blog about autogen multi AI agent framework"
230 | # )
231 | 
232 | 
233 | # # Define research function
234 | # def search(query):
235 | #     url = "https://google.serper.dev/search"
236 | 
237 | #     payload = json.dumps({"q": query})
238 | #     headers = {
239 | #         "X-API-KEY": SERPER_API_KEY,
240 | #         "Content-Type": "application/json",
241 | #     }
242 | 
243 | #     response = requests.request("POST", url, headers=headers, data=payload)
244 | 
245 | #     return response.json()
246 | 
247 | 
248 | ###  ---------------------  ###
249 | # <article>
250 | # TL;DR: We demonstrate how to use autogen for local LLM application. As an example, we will initiate an endpoint using FastChat and perform inference on ChatGLMv2-6b.
251 | 
252 | # Preparations​
253 | # Clone FastChat​
254 | # FastChat provides OpenAI-compatible APIs for its supported models, so you can use FastChat as a local drop-in replacement for OpenAI APIs. However, its code needs minor modification in order to function properly.
255 | 
256 | # git clone https://github.com/lm-sys/FastChat.git
257 | # cd FastChat
258 | # Download checkpoint​
259 | # ChatGLM-6B is an open bilingual language model based on General Language Model (GLM) framework, with 6.2 billion parameters. ChatGLM2-6B is its second-generation version.
260 | 
261 | # Before downloading from HuggingFace Hub, you need to have Git LFS installed.
262 | 
263 | # git clone https://huggingface.co/THUDM/chatglm2-6b
264 | # Initiate server​
265 | # First, launch the controller
266 | 
267 | # python -m fastchat.serve.controller
268 | # Then, launch the model worker(s)
269 | 
270 | # python -m fastchat.serve.model_worker --model-path chatglm2-6b
271 | # Finally, launch the RESTful API server
272 | 
273 | # python -m fastchat.serve.openai_api_server --host localhost --port 8000
274 | # Normally this will work. However, if you encounter error like this, commenting out all the lines containing finish_reason in fastchat/protocol/api_protocal.py and fastchat/protocol/openai_api_protocol.py will fix the problem. The modified code looks like:
275 | 
276 | # class CompletionResponseChoice(BaseModel):
277 | #     index: int
278 | #     text: str
279 | #     logprobs: Optional[int] = None
280 | #     # finish_reason: Optional[Literal["stop", "length"]]
281 | 
282 | # class CompletionResponseStreamChoice(BaseModel):
283 | #     index: int
284 | #     text: str
285 | #     logprobs: Optional[float] = None
286 | #     # finish_reason: Optional[Literal["stop", "length"]] = None
287 | # Interact with model using oai.Completion​
288 | # Now the models can be directly accessed through openai-python library as well as autogen.oai.Completion and autogen.oai.ChatCompletion.
289 | 
290 | # from autogen import oai
291 | 
292 | # # create a text completion request
293 | # response = oai.Completion.create(
294 | #     config_list=[
295 | #         {
296 | #             "model": "chatglm2-6b",
297 | #             "api_base": "http://localhost:8000/v1",
298 | #             "api_type": "open_ai",
299 | #             "api_key": "NULL", # just a placeholder
300 | #         }
301 | #     ],
302 | #     prompt="Hi",
303 | # )
304 | # print(response)
305 | 
306 | # # create a chat completion request
307 | # response = oai.ChatCompletion.create(
308 | #     config_list=[
309 | #         {
310 | #             "model": "chatglm2-6b",
311 | #             "api_base": "http://localhost:8000/v1",
312 | #             "api_type": "open_ai",
313 | #             "api_key": "NULL",
314 | #         }
315 | #     ],
316 | #     messages=[{"role": "user", "content": "Hi"}]
317 | # )
318 | # print(response)
319 | # If you would like to switch to different models, download their checkpoints and specify model path when launching model worker(s).
320 | 
321 | # interacting with multiple local LLMs​
322 | # If you would like to interact with multiple LLMs on your local machine, replace the model_worker step above with a multi model variant:
323 | 
324 | # python -m fastchat.serve.multi_model_worker \
325 | #     --model-path lmsys/vicuna-7b-v1.3 \
326 | #     --model-names vicuna-7b-v1.3 \
327 | #     --model-path chatglm2-6b \
328 | #     --model-names chatglm2-6b
329 | # The inference code would be:
330 | 
331 | # from autogen import oai
332 | 
333 | # # create a chat completion request
334 | # response = oai.ChatCompletion.create(
335 | #     config_list=[
336 | #         {
337 | #             "model": "chatglm2-6b",
338 | #             "api_base": "http://localhost:8000/v1",
339 | #             "api_type": "open_ai",
340 | #             "api_key": "NULL",
341 | #         },
342 | #         {
343 | #             "model": "vicuna-7b-v1.3",
344 | #             "api_base": "http://localhost:8000/v1",
345 | #             "api_type": "open_ai",
346 | #             "api_key": "NULL",
347 | #         }
348 | #     ],
349 | #     messages=[{"role": "user", "content": "Hi"}]
350 | # )
351 | # print(response)
352 | # </article>
353 | 


--------------------------------------------------------------------------------
/simplest_summary.py:
--------------------------------------------------------------------------------
 1 | import autogen
 2 | from autogen import config_list_from_json
 3 | 
 4 | 
 5 | config_list = config_list_from_json(
 6 |     env_or_file="OAI_CONFIG_LIST",
 7 | )
 8 | llm_config = {"config_list": config_list}
 9 | 
10 | 
11 | writer = autogen.AssistantAgent(
12 |     name="Writer",
13 |     system_message="""
14 |     You are a professional content researcher.\
15 |     You specialize at summarizing content of the provided article or blog.\
16 |     Your summaries are detailed and well structured.\
17 |     Add TERMINATE to the end of the message""",
18 |     llm_config=llm_config,
19 | )
20 | 
21 | user_proxy = autogen.UserProxyAgent(
22 |     name="User_proxy",
23 |     is_termination_msg=lambda x: x.get("content", "")
24 |     and x.get("content", "").rstrip().endswith("TERMINATE"),
25 |     human_input_mode="TERMINATE",
26 | )
27 | 
28 | 
29 | # Start the conversation
30 | user_proxy.initiate_chat(
31 |     writer,
32 |     message="""Summarize the article: https://blog.stackademic.com/using-chatgpt-for-web-scraping-a-practical-guide-673fa2bbfac1""",
33 | )
34 | 


--------------------------------------------------------------------------------
/summaries/langchain.md:
--------------------------------------------------------------------------------
 1 | 
 2 | The article "LangChain 101: Part 1. Building Simple Q&A App" by Ivan Reznikov presents an introductory guide to LangChain, a powerful framework used for generating text-based applications. The framework's primary focus is language and deals with tasks such as creating text, answering questions, translating languages, among other text-related tasks.
 3 | 
 4 | LangChain uses six basic components:
 5 | 1. **Models**: Large Language Models (LLMs) used for generating text, answering questions, translating languages, and more.
 6 | 2. **Prompts**: Text components used to guide the output of LLMs. They can be simple or complex.
 7 | 3. **Chains**: Sequences of instructions performed by the LangChain framework.
 8 | 4. **Memory**: Used to store data the LLM can access later.
 9 | 5. **Indexes**: Unique data structures for storing information about data content.
10 | 6. **Agents and Tools**: Agents perform specific tasks like text generation or language translation while tools assist with other functions like data manipulation.
11 | 
12 | The article also provides a practical example of using LangChain to build a Q&A system that answers questions about official holidays in the United Arab Emirates. It demonstrates how to use the different elements i.e., models, chains, prompts, agents, memory, and indexes, in the construction of the system. The system uses a unique prompt based on the query at hand and builds a chain that combines LLMs and memory to process the question and provide an answer that makes use of retained chat history.
13 | 
14 | In the upcoming LangChain 101 course, readers will learn how to use their computers' models, data, and execute more complex tasks. The course will also teach them how to use memory to store information and build real-world applications with LangChain.
15 | 


--------------------------------------------------------------------------------
/summaries/summarize_article.py:
--------------------------------------------------------------------------------
 1 | with open(
 2 |     "/home/kris/Documents/SmartNotes/SecondBrain/Evergreen Notes/langchain.md", "w"
 3 | ) as file:
 4 |     file.write(
 5 |         """
 6 |     ## Main Benefits of LangChain:
 7 |     * It simplifies the use of Large Language Models (LLMs) for specific tasks.
 8 |     * Allows combining the power of LLMs with other programming techniques.
 9 |     * Provides an ability to control and influence LLM's output via prompts.
10 |     * Memory feature allowing LLM to learn from previous interactions and build a knowledge base.
11 |     * Offers unique components like Chains, which are sequences of instructions executed to perform a task.
12 |     * Facilitates the construction of unique and complex chains of instructions that can perform sophisticated operations.
13 | 
14 |     ## Main Modules of LangChain:
15 |     * **Models**: Large language models trained on massive datasets of text and code.
16 |     * **Prompts**: Pieces of text that guide the LLM to generate the desired output.
17 |     * **Chains**: Sequences of instructions the LangChain framework executes to perform a task.
18 |     * **Memory**: A method of storing data that the LLM can access later.
19 |     * **Indexes**: Unique data structures to store information about the data content.
20 |     * **Agents and Tools**: Agents are reusable components that can perform specific tasks, while Tools are function libraries to aid in developing various agents.
21 |     """
22 |     )
23 | 


--------------------------------------------------------------------------------
/summaries/summary.md:
--------------------------------------------------------------------------------
1 | # Your article text here
2 | 
3 | ## Key Takeaways:
4 | 


--------------------------------------------------------------------------------
/summaries/summary1.md:
--------------------------------------------------------------------------------
1 | TL;DR: This article provides a demonstration of how to use autogen for local LLM application using FastChat and ChatGLMv2-6b. The article covers preparations, cloning FastChat and downloading the checkpoint for ChatGLM-6B. It also includes instructions for initiating the server and interacting with the model using oai.Completion. Lastly, it explains how to interact with multiple local LLMs on your machine.


--------------------------------------------------------------------------------
/summarize.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import requests
  4 | from bs4 import BeautifulSoup
  5 | 
  6 | import autogen
  7 | from autogen import config_list_from_json
  8 | 
  9 | from dotenv import load_dotenv
 10 | 
 11 | load_dotenv()
 12 | # Get api key
 13 | config_list = config_list_from_json(
 14 |     env_or_file="OAI_CONFIG_LIST",
 15 |     filter_dict={
 16 |         "model": [
 17 |             "gpt-4",
 18 |             "gpt-4-0613",  # required for function calling
 19 |         ],
 20 |     },
 21 | )
 22 | llm_config = {"config_list": config_list, "seed": 41, "request_timeout": 240}
 23 | 
 24 | BROWSERLESS_API_KEY = os.getenv("BROWSERLESS_API_KEY")
 25 | 
 26 | 
 27 | def scrape(url: str):
 28 |     print("Using the scrape function...")
 29 |     # Prepare the URL
 30 |     browserless_url = (
 31 |         f"https://chrome.browserless.io/content?token={BROWSERLESS_API_KEY}"
 32 |     )
 33 | 
 34 |     # Define the headers for the request
 35 |     headers = {
 36 |         "Cache-Control": "no-cache",
 37 |         "Content-Type": "application/json",
 38 |     }
 39 | 
 40 |     # Define the data to be sent in the request
 41 |     data = {"url": url}
 42 | 
 43 |     # Convert Python object to JSON string
 44 |     data_json = json.dumps(data)
 45 | 
 46 |     # Send the POST request
 47 |     response = requests.post(
 48 |         browserless_url,
 49 |         headers=headers,
 50 |         data=data_json,
 51 |     )
 52 | 
 53 |     # Check the response status code
 54 |     if response.status_code == 200:
 55 |         soup = BeautifulSoup(response.content, "html.parser")
 56 |         content = soup.get_text()
 57 |         # print("Article text:", content)
 58 |     else:
 59 |         print(f"HTTP request failed with status code {response.status_code}")
 60 | 
 61 |     return content
 62 | 
 63 | 
 64 | # function callings for the agent
 65 | llm_config_writer = {
 66 |     "functions": [
 67 |         {
 68 |             "name": "scrape",
 69 |             "description": "Scraping website content based on url",
 70 |             "parameters": {
 71 |                 "type": "object",
 72 |                 "properties": {
 73 |                     "url": {
 74 |                         "type": "string",
 75 |                         "description": "Website url to scrape",
 76 |                     }
 77 |                 },
 78 |                 "required": ["url"],
 79 |             },
 80 |         },
 81 |     ],
 82 |     "config_list": config_list,
 83 | }
 84 | 
 85 | 
 86 | writer = autogen.AssistantAgent(
 87 |     name="Writer",
 88 |     system_message="""
 89 |     You are a professional content researcher.\
 90 |     You specialize at summarizing content of the provided article or blog.\
 91 |     Your summaries are detailed and well structured.\
 92 |     Use scrape function to get the content based on URL.\
 93 |     Add TERMINATE to the end of the message""",
 94 |     llm_config=llm_config_writer,
 95 | )
 96 | 
 97 | user_proxy = autogen.UserProxyAgent(
 98 |     name="User_proxy",
 99 |     is_termination_msg=lambda x: x.get("content", "")
100 |     and x.get("content", "").rstrip().endswith("TERMINATE"),
101 |     human_input_mode="TERMINATE",
102 |     function_map={
103 |         "scrape": scrape,
104 |     },
105 | )
106 | 
107 | 
108 | # Start the conversation
109 | user_proxy.initiate_chat(
110 |     writer,
111 |     message="""Summarize the article and highlight key takeaways: https://blog.stackademic.com/using-chatgpt-for-web-scraping-a-practical-guide-673fa2bbfac1""",
112 | )
113 | 


--------------------------------------------------------------------------------
/test_scrape.py:
--------------------------------------------------------------------------------
 1 | def scrape(url: str):
 2 |     # scrape website, and also will summarize the content based on objective if the content is too large
 3 |     # objective is the original objective & task that user give to the agent, url is the url of the website to be scraped
 4 | 
 5 |     print("Scraping website...")
 6 | 
 7 |     # Prepare the URL
 8 |     browserless_url = (
 9 |         f"https://chrome.browserless.io/content?token={BROWSERLESS_API_KEY}"
10 |     )
11 | 
12 |     # Define the headers for the request
13 |     headers = {
14 |         "Cache-Control": "no-cache",
15 |         "Content-Type": "application/json",
16 |     }
17 | 
18 |     # Define the data to be sent in the request
19 |     data = {"url": url}
20 | 
21 |     # Convert Python object to JSON string
22 |     data_json = json.dumps(data)
23 | 
24 |     # Send the POST request
25 |     response = requests.post(
26 |         browserless_url,
27 |         headers=headers,
28 |         data=data_json,
29 |     )
30 | 
31 |     # Check the response status code
32 |     if response.status_code == 200:
33 |         soup = BeautifulSoup(response.content, "html.parser")
34 |         text = soup.get_text()
35 |         print("CONTENTTTTTT:", text)
36 |         # if len(text) > 8000:
37 |         #     output = summary(text)
38 |         #     return output
39 |         # else:
40 |         #     return text
41 |     else:
42 |         print(f"HTTP request failed with status code {response.status_code}")
43 | 


--------------------------------------------------------------------------------
/try_save_md.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import json
  3 | import requests
  4 | from bs4 import BeautifulSoup
  5 | 
  6 | import autogen
  7 | from autogen import config_list_from_json
  8 | 
  9 | from dotenv import load_dotenv
 10 | 
 11 | load_dotenv()
 12 | # Get api key
 13 | config_list = config_list_from_json(
 14 |     env_or_file="OAI_CONFIG_LIST",
 15 |     filter_dict={
 16 |         "model": [
 17 |             "gpt-4",
 18 |             "gpt-4-0613",
 19 |         ],
 20 |     },
 21 | )
 22 | llm_config = {"config_list": config_list, "seed": 42, "request_timeout": 240}
 23 | 
 24 | BROWSERLESS_API_KEY = os.getenv("BROWSERLESS_API_KEY")
 25 | 
 26 | 
 27 | def scrape(url: str):
 28 |     print("Using the scrape function...")
 29 |     # Prepare the URL
 30 |     browserless_url = (
 31 |         f"https://chrome.browserless.io/content?token={BROWSERLESS_API_KEY}"
 32 |     )
 33 | 
 34 |     # Define the headers for the request
 35 |     headers = {
 36 |         "Cache-Control": "no-cache",
 37 |         "Content-Type": "application/json",
 38 |     }
 39 | 
 40 |     # Define the data to be sent in the request
 41 |     data = {"url": url}
 42 | 
 43 |     # Convert Python object to JSON string
 44 |     data_json = json.dumps(data)
 45 | 
 46 |     # Send the POST request
 47 |     response = requests.post(
 48 |         browserless_url,
 49 |         headers=headers,
 50 |         data=data_json,
 51 |     )
 52 | 
 53 |     # Check the response status code
 54 |     if response.status_code == 200:
 55 |         soup = BeautifulSoup(response.content, "html.parser")
 56 |         content = soup.get_text()
 57 |         # print("Article text:", content)
 58 |     else:
 59 |         print(f"HTTP request failed with status code {response.status_code}")
 60 | 
 61 |     return content
 62 | 
 63 | 
 64 | def save_summary(content: str, filename: str):
 65 |     obs_dir = (
 66 |         "/home/kris/Documents/SmartNotes/SecondBrain/AutoGen/"  # path to Second Brain
 67 |     )
 68 |     with open(f"{obs_dir}{filename}", "w") as file:
 69 |         file.write(content)
 70 | 
 71 | 
 72 | # function callings for the agent
 73 | llm_config_writer = {
 74 |     "functions": [
 75 |         {
 76 |             "name": "scrape",
 77 |             "description": "Scraping website content based on url",
 78 |             "parameters": {
 79 |                 "type": "object",
 80 |                 "properties": {
 81 |                     "url": {
 82 |                         "type": "string",
 83 |                         "description": "Website url to scrape",
 84 |                     }
 85 |                 },
 86 |                 "required": ["url"],
 87 |             },
 88 |         },
 89 |         {
 90 |             "name": "save_summary",
 91 |             "description": "Saving summary content to file",
 92 |             "parameters": {
 93 |                 "type": "object",
 94 |                 "properties": {
 95 |                     "content": {
 96 |                         "type": "string",
 97 |                         "description": "The summary of the article in MarkDown format",
 98 |                     },
 99 |                     "filename": {
100 |                         "type": "string",
101 |                         "description": "The filename to save the summary",
102 |                     },
103 |                 },
104 |                 "required": ["content", "filename"],
105 |             },
106 |         },
107 |     ],
108 |     "config_list": config_list,
109 | }
110 | 
111 | 
112 | writer = autogen.AssistantAgent(
113 |     name="Writer",
114 |     system_message="""
115 |     You are a professional content researcher.\
116 |     You specialize at summarizing content of the provided article or blog.\
117 |     Your summaries are detailed and well structured.\
118 |     Use scrape function to get the content based on URL.\
119 |     Then use save_summary function to save the generated summary.\
120 |     Add TERMINATE to the end of the message""",
121 |     llm_config=llm_config_writer,
122 | )
123 | 
124 | 
125 | user_proxy = autogen.UserProxyAgent(
126 |     name="User_proxy",
127 |     is_termination_msg=lambda x: x.get("content", "")
128 |     and x.get("content", "").rstrip().endswith("TERMINATE"),
129 |     human_input_mode="TERMINATE",
130 |     function_map={
131 |         "scrape": scrape,
132 |         "save_summary": save_summary,
133 |     },
134 | )
135 | 
136 | 
137 | # Start the conversation
138 | user_proxy.initiate_chat(
139 |     writer,
140 |     message="""Summarize the article and highlight key takeaways: https://python.plainenglish.io/python-7-mind-blowing-use-cases-of-the-lambda-function-3bb896f866af""",
141 | )
142 | 
143 | # message="""Summarize the article and highlight key takeaways: https://python.plainenglish.io/python-7-mind-blowing-use-cases-of-the-lambda-function-3bb896f866af""",
144 | # message="""Summarize the article: https://pub.towardsai.net/langchain-101-part-1-building-simple-q-a-app-90d9c4e815f3""",
145 | # message="""Summarize the article: https://towardsdatascience.com/introducing-keyllm-keyword-extraction-with-llms-39924b504813""",
146 | # message="""Summarize the article: https://towardsdatascience.com/exploring-gemba-a-new-llm-based-metric-for-translation-quality-assessment-3a3383de6d1f""",
147 | 
148 | # /home/kris/Documents/SmartNotes/SecondBrain/AutoGen
149 | 


--------------------------------------------------------------------------------