├── .gitattributes ├── .gitignore ├── app.py ├── custom_tools.py ├── email_cleaning.py ├── extract_faq.py └── mbox_to_csv.py /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | .env 3 | __pycache__/custom_tools.cpython-310.pyc 4 | email_pairs.csv 5 | faq.csv 6 | past_email_mbox.csv 7 | Sent.mbox 8 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | from dotenv import find_dotenv, load_dotenv 2 | 3 | from langchain.agents import initialize_agent 4 | from langchain.agents import AgentType 5 | from langchain.chat_models import ChatOpenAI 6 | from langchain.prompts import MessagesPlaceholder 7 | from langchain.memory import ConversationSummaryBufferMemory 8 | from langchain.chains.summarize import load_summarize_chain 9 | from langchain.schema import SystemMessage 10 | from custom_tools import CreateEmailDraftTool, GenerateEmailResponseTool, ReplyEmailTool, EscalateTool, ProspectResearchTool, CategoriseEmailTool 11 | 12 | load_dotenv() 13 | llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613") 14 | 15 | system_message = SystemMessage( 16 | content=""" 17 | You are an email inbox assistant of an AI youtube channel called "AI Jason", 18 | who is creating AI educational content, 19 | Your goal is to handle all the incoming emails by categorising them based on 20 | guideline and decide on next steps 21 | """ 22 | ) 23 | 24 | tools = [ 25 | CategoriseEmailTool(), 26 | ProspectResearchTool(), 27 | EscalateTool(), 28 | ReplyEmailTool(), 29 | CreateEmailDraftTool(), 30 | GenerateEmailResponseTool(), 31 | ] 32 | 33 | agent_kwargs = { 34 | "extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")], 35 | "system_message": system_message, 36 | } 37 | memory = ConversationSummaryBufferMemory( 38 | memory_key="memory", return_messages=True, llm=llm, max_token_limit=1000) 39 | 40 | agent = initialize_agent( 41 | tools, 42 | llm, 43 | agent=AgentType.OPENAI_FUNCTIONS, 44 | verbose=True, 45 | agent_kwargs=agent_kwargs, 46 | memory=memory, 47 | ) 48 | 49 | 50 | test_email = """ 51 | xxxxxxx 52 | """ 53 | 54 | agent({"input": test_email}) 55 | -------------------------------------------------------------------------------- /custom_tools.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv, find_dotenv 3 | import openai 4 | 5 | from langchain import PromptTemplate 6 | from langchain.agents import initialize_agent, Tool 7 | from langchain.agents import AgentType 8 | from langchain.chat_models import ChatOpenAI 9 | from langchain.prompts import MessagesPlaceholder 10 | from langchain.memory import ConversationSummaryBufferMemory 11 | from langchain.text_splitter import RecursiveCharacterTextSplitter 12 | from langchain.chains.summarize import load_summarize_chain 13 | from langchain.tools import BaseTool 14 | from pydantic import BaseModel, Field 15 | from typing import Type 16 | from bs4 import BeautifulSoup 17 | import requests 18 | import json 19 | from langchain.schema import SystemMessage 20 | 21 | load_dotenv(find_dotenv()) 22 | openai.api_key = os.environ.get("OPENAI_API_KEY") 23 | llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613") 24 | 25 | 26 | # CATEGORISE EMAIL 27 | def check_consulting_email(lates_reply: str): 28 | prompt = f""" 29 | EMAIL: {lates_reply} 30 | --- 31 | 32 | Above is an email about Job offer / consulting; Your goal is identify if all information above is mentioned: 33 | 1. What's the problem the prospect is trying to solve? 34 | 2. Their budget 35 | 36 | If all info above is collected, return YES, otherwise, return NO; (Return ONLY YES or NO) 37 | 38 | ANSWER: 39 | """ 40 | 41 | all_needs_collected_result = openai.ChatCompletion.create( 42 | model="gpt-4", 43 | messages=[ 44 | {"role": "user", "content": prompt} 45 | ] 46 | ) 47 | 48 | all_needs_collected = all_needs_collected_result["choices"][0]["message"]["content"] 49 | 50 | return all_needs_collected 51 | 52 | 53 | def categorise_email(lates_reply: str): 54 | categorise_prompt = f""" 55 | EMAIL: {lates_reply} 56 | --- 57 | 58 | Your goal is to categorise the email based on categories below: 59 | 60 | 1. COLLABORATION/SPONSORSHIP: These are emails where companies or individuals are reaching out to propose a collaboration or sponsorship opportunity with AI Jason. They often include details about their product or service and how they envision the partnership. 61 | 62 | 2. JOB_OFFER/CONSULTING: These emails involve individuals or companies reaching out to AI Jason with a specific job or project they want him to work on. This could range from developing an AI application to leading a specific activity. 63 | 64 | 3. QUESTIONS: These emails involve individuals reaching out to AI Jason with specific questions or inquiries. This could be about his videos, his knowledge on a specific topic, or his thoughts on a specific AI tool or technology. 65 | 66 | 4. NON_REPLY: These are auto emails that don't need any response or involve companies or individuals reaching out to AI Jason to offer their services. This could be a marketing agency offering to help him find sponsorship opportunities or a company offering a specific tool or service they think he might find useful. 67 | 68 | 5. OTHER: These are emails that don't fit into any of the above categories. 69 | 70 | CATEGORY (Return ONLY the category name in capital): 71 | """ 72 | 73 | category_result = openai.ChatCompletion.create( 74 | model="gpt-4", 75 | messages=[ 76 | {"role": "user", "content": categorise_prompt} 77 | ] 78 | ) 79 | 80 | category = category_result["choices"][0]["message"]["content"] 81 | 82 | if category == "JOB_OFFER/CONSULTING": 83 | all_needs_collected = check_consulting_email(lates_reply) 84 | if all_needs_collected == "YES": 85 | return { 86 | "Step 1": """Forward the email to jason.zhou.design@gmail.com, with summary of 1.What's the problem the prospect is trying to solve? 2.Their budget""" 87 | } 88 | else: 89 | return { 90 | "Step 1": "Generate email response to the prospect to collect further info based on guidelines", 91 | "Step 2": "Send generated email response to prospect", 92 | } 93 | else: 94 | if category == "COLLABORATION/SPONSORSHIP": 95 | return { 96 | "Step 1": "Research about the prospect & company", 97 | "Step 2": "Forward the email to jason.zhou.design@gmail.com, with the research results included" 98 | } 99 | else: 100 | if category == "NON_REPLY": 101 | return f"This email has already been taken care of or replied before, nothing needs to be done now" 102 | else: 103 | return { 104 | "Step 1": "Generate email response based on guidelines", 105 | "Step 2": "Create email draft with the generated response" 106 | } 107 | 108 | 109 | class CategoriseEmailInput(BaseModel): 110 | lates_reply: str = Field(description="Latest reply from the prospect ") 111 | 112 | 113 | class CategoriseEmailTool(BaseTool): 114 | name = "categorise_email" 115 | description = "use this to categorise email to decide what to do next" 116 | args_schema: Type[BaseModel] = CategoriseEmailInput 117 | 118 | def _run(self, lates_reply: str): 119 | return categorise_email(lates_reply) 120 | 121 | def _arun(self, url: str): 122 | raise NotImplementedError( 123 | "get_stock_performance does not support async") 124 | 125 | 126 | # WRITE EMAIL 127 | def generate_email_response(email_thread: str, category: str): 128 | # URL endpoint 129 | url = "https://api-f1db6c.stack.tryrelevance.com/latest/studios/6af484b0-a8bf-4545-91b8-75d46ac8f354/trigger_limited" 130 | 131 | # Headers 132 | headers = { 133 | "Content-Type": "application/json" 134 | } 135 | 136 | # Payload (data) 137 | data = { 138 | "params": { 139 | "client_email": email_thread, 140 | "goal": "write email response" if category != "CONSULTING FOLLOW UP" else "for each consulting email, we need to collect 1. Their use case & problem they are trying to solve 2. Their budget; Try to collect those info from them", 141 | }, 142 | "project": "f86edbc1-fcb6-41f9-b9b6-be14a6f06412" 143 | } 144 | 145 | # Send POST request 146 | response = requests.post(url, headers=headers, json=data) 147 | 148 | return response.text 149 | 150 | 151 | class GenerateEmailResponseInput(BaseModel): 152 | """Inputs for scrape_website""" 153 | email_thread: str = Field(description="The original full email thread") 154 | category: str = Field( 155 | description='category of email, can ONLY be "CONSULTING FOLLOW UP" or "OTHER" ') 156 | 157 | 158 | class GenerateEmailResponseTool(BaseTool): 159 | name = "generate_email_response" 160 | description = "use this to generate the email response based on specific guidelines, voice & tone & knowledge for AI Jason" 161 | args_schema: Type[BaseModel] = GenerateEmailResponseInput 162 | 163 | def _run(self, email_thread: str, category: str): 164 | return generate_email_response(email_thread, category) 165 | 166 | def _arun(self, url: str): 167 | raise NotImplementedError("failed to escalate") 168 | 169 | 170 | # RESEARCH AGENT 171 | 172 | def summary(objective, content): 173 | text_splitter = RecursiveCharacterTextSplitter( 174 | separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500) 175 | docs = text_splitter.create_documents([content]) 176 | map_prompt = """ 177 | Write a summary of the following text for {objective}: 178 | "{text}" 179 | SUMMARY: 180 | """ 181 | map_prompt_template = PromptTemplate( 182 | template=map_prompt, input_variables=["text", "objective"]) 183 | 184 | summary_chain = load_summarize_chain( 185 | llm=llm, 186 | chain_type='map_reduce', 187 | map_prompt=map_prompt_template, 188 | combine_prompt=map_prompt_template, 189 | verbose=False 190 | ) 191 | 192 | output = summary_chain.run(input_documents=docs, objective=objective) 193 | 194 | return output 195 | 196 | 197 | def scrape_website(objective: str, url: str): 198 | # scrape website, and also will summarize the content based on objective if the content is too large 199 | # objective is the original objective & task that user give to the agent, url is the url of the website to be scraped 200 | 201 | # Define the headers for the request 202 | headers = { 203 | 'Cache-Control': 'no-cache', 204 | 'Content-Type': 'application/json', 205 | } 206 | 207 | # Define the data to be sent in the request 208 | data = { 209 | "url": url 210 | } 211 | 212 | # Convert Python object to JSON string 213 | data_json = json.dumps(data) 214 | 215 | # Send the POST request 216 | response = requests.post( 217 | "https://chrome.browserless.io/content?token=xxxxxxxxxxxxxxxxxxxxxxxxxxx", headers=headers, data=data_json) 218 | 219 | # Check the response status code 220 | if response.status_code == 200: 221 | soup = BeautifulSoup(response.content, "html.parser") 222 | text = soup.get_text() 223 | if len(text) > 10000: 224 | output = summary(objective, text) 225 | return output 226 | else: 227 | return text 228 | else: 229 | return f"HTTP request failed with status code {response.status_code}" 230 | 231 | 232 | class ScrapeWebsiteInput(BaseModel): 233 | """Inputs for scrape_website""" 234 | objective: str = Field( 235 | description="The objective & task that users give to the agent") 236 | url: str = Field(description="The url of the website to be scraped") 237 | 238 | 239 | class ScrapeWebsiteTool(BaseTool): 240 | name = "scrape_website" 241 | description = "useful when you need to get data from a website url, passing both url and objective to the function; DO NOT make up any url, the url should only be from the search results" 242 | args_schema: Type[BaseModel] = ScrapeWebsiteInput 243 | 244 | def _run(self, objective: str, url: str): 245 | return scrape_website(objective, url) 246 | 247 | def _arun(self, url: str): 248 | raise NotImplementedError( 249 | "get_stock_performance does not support async") 250 | 251 | 252 | def search(query): 253 | url = "https://google.serper.dev/search" 254 | 255 | payload = json.dumps({ 256 | "q": query 257 | }) 258 | 259 | headers = { 260 | 'X-API-KEY': 'xxxxxxxxxxxxxxxxxxxxx', 261 | 'Content-Type': 'application/json' 262 | } 263 | 264 | response = requests.request("POST", url, headers=headers, data=payload) 265 | 266 | return response.text 267 | 268 | 269 | def prospect_research(email_or_name: str, company: str): 270 | tools = [ 271 | Tool( 272 | name="Search", 273 | func=search, 274 | description="useful for when you need to answer questions about current events, data. You should ask targeted questions" 275 | ), 276 | ScrapeWebsiteTool(), 277 | ] 278 | 279 | system_message = SystemMessage( 280 | content="""You are a world class researcher, who can do detailed research on any topic and produce facts based results; 281 | you do not make things up, you will try as hard as possible to gather facts & data to back up the research 282 | 283 | Please make sure you complete the objective above with the following rules: 284 | 1/ You should do enough research to gather as much information as possible about the objective 285 | 2/ If there are url of relevant links & articles, you will scrape it to gather more information 286 | 3/ After scraping & search, you should think "is there any new things i should search & scraping based on the data I collected to increase research quality?" If answer is yes, continue; But don't do this more than 3 iteratins 287 | 4/ You should not make things up, you should only write facts & data that you have gathered 288 | 5/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research 289 | 6/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research""" 290 | ) 291 | 292 | agent_kwargs = { 293 | "extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")], 294 | "system_message": system_message, 295 | } 296 | memory = ConversationSummaryBufferMemory( 297 | memory_key="memory", return_messages=True, llm=llm, max_token_limit=1000) 298 | 299 | agent = initialize_agent( 300 | tools, 301 | llm, 302 | agent=AgentType.OPENAI_FUNCTIONS, 303 | verbose=False, 304 | agent_kwargs=agent_kwargs, 305 | memory=memory, 306 | ) 307 | 308 | message = f"Research about {company} and {email_or_name}; What does the company do, and who the person is" 309 | 310 | result = agent({"input": message}) 311 | 312 | return result 313 | 314 | 315 | class ProspectResearchInput(BaseModel): 316 | """Inputs for scrape_website""" 317 | email_or_name: str = Field( 318 | description="The original email address or name of prospect") 319 | company: str = Field(description="The company name of prospect") 320 | 321 | 322 | class ProspectResearchTool(BaseTool): 323 | name = "prospect_research" 324 | description = "useful when you need to research about a prospect, passing both email and company to the function, return the summary of its company as well as the prospect" 325 | args_schema: Type[BaseModel] = ProspectResearchInput 326 | 327 | def _run(self, email_or_name: str, company: str): 328 | return prospect_research(email_or_name, company) 329 | 330 | def _arun(self, url: str): 331 | raise NotImplementedError("failed to escalate") 332 | 333 | 334 | # ESCALATE 335 | 336 | def escalate(original_email_address: str, message: str, additional_context: str): 337 | # URL to send the POST request to 338 | url = 'https://hooks.zapier.com/hooks/catch/15616669/38qwq19/' 339 | 340 | # Data to send in the POST request 341 | data = { 342 | "prospect email": original_email_address, 343 | "prospect message": message, 344 | "additional context": additional_context 345 | } 346 | 347 | # Send the POST request 348 | response = requests.post(url, data=data) 349 | 350 | # Check the response 351 | if response.status_code == 200: 352 | return ('This email has been escalated to Jason, he will take care of it from here, nothing needs to be done now') 353 | else: 354 | return ('Failed to send POST request:', response.status_code) 355 | 356 | 357 | class EscalateInput(BaseModel): 358 | """Inputs for scrape_website""" 359 | message: str = Field( 360 | description="The original email thread & message that was received, cc the original copy for escalation") 361 | original_email_address: str = Field( 362 | description="The email address that sent the message/email") 363 | additional_context: str = Field( 364 | description="additional context about the prospect, can be the company/prospct background research OR the consulting request details like use case, budget, etc.") 365 | 366 | 367 | class EscalateTool(BaseTool): 368 | name = "escalate_to_jason" 369 | description = "useful when you need to escalate the case to jason or others, passing both message and original_email_address to the function" 370 | args_schema: Type[BaseModel] = EscalateInput 371 | 372 | def _run(self, original_email_address: str, message: str, additional_context: str): 373 | return escalate(original_email_address, message, additional_context) 374 | 375 | def _arun(self, url: str): 376 | raise NotImplementedError("failed to escalate") 377 | 378 | 379 | # REPLY EMAIL 380 | def reply_email(message: str, email_address: str, subject: str): 381 | return f"An email has been sent to {email_address}" 382 | 383 | # URL to send the POST request to 384 | url = 'https://hooks.zapier.com/hooks/catch/15616669/38qaaau/' 385 | 386 | # Data to send in the POST request 387 | data = { 388 | "Email": email_address, 389 | "Subject": subject, 390 | "Reply": message 391 | } 392 | 393 | # Send the POST request 394 | response = requests.post(url, data=data) 395 | 396 | # Check the response 397 | if response.status_code == 200: 398 | return ('Email reply has been created successfully') 399 | else: 400 | return ('Failed to send POST request:', response.status_code) 401 | 402 | 403 | class ReplyEmailInput(BaseModel): 404 | """Inputs for scrape_website""" 405 | message: str = Field( 406 | description="The generated response message to be sent to the email address") 407 | email_address: str = Field( 408 | description="Destination email address to send email to") 409 | subject: str = Field(description="subject of the email") 410 | 411 | 412 | class ReplyEmailTool(BaseTool): 413 | name = "reply_email" 414 | description = "use this to send emails" 415 | args_schema: Type[BaseModel] = ReplyEmailInput 416 | 417 | def _run(self, message: str, email_address: str, subject: str): 418 | return reply_email(message, email_address, subject) 419 | 420 | def _arun(self, url: str): 421 | raise NotImplementedError("failed to escalate") 422 | 423 | 424 | # CREATE EMAIL DRAFT 425 | def create_email_draft(prospect_email_address: str, subject: str, generated_reply: str): 426 | # URL to send the POST request to 427 | url = 'https://hooks.zapier.com/hooks/catch/15616669/38ikw12/' 428 | 429 | # Data to send in the POST request 430 | data = { 431 | "email": prospect_email_address, 432 | "subject": subject, 433 | "reply": generated_reply 434 | } 435 | 436 | # Send the POST request 437 | response = requests.post(url, data=data) 438 | 439 | # Check the response 440 | if response.status_code == 200: 441 | return ('Email draft has been created successfully') 442 | else: 443 | return ('Failed to send POST request:', response.status_code) 444 | 445 | 446 | class CreateEmailDraftInput(BaseModel): 447 | """Inputs for scrape_website""" 448 | prospect_email_address: str = Field( 449 | description="The prospect's email address") 450 | subject: str = Field(description="The original email subject") 451 | generated_reply: str = Field( 452 | description="Generated email reply to prospect") 453 | 454 | 455 | class CreateEmailDraftTool(BaseTool): 456 | name = "create_email_draft" 457 | description = "use this to create email draft for jason to review & send" 458 | args_schema: Type[BaseModel] = CreateEmailDraftInput 459 | 460 | def _run(self, prospect_email_address: str, subject: str, generated_reply: str): 461 | return create_email_draft(prospect_email_address, subject, generated_reply) 462 | 463 | def _arun(self, url: str): 464 | raise NotImplementedError("failed to escalate") 465 | -------------------------------------------------------------------------------- /email_cleaning.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import find_dotenv, load_dotenv 3 | import openai 4 | import json, csv 5 | 6 | load_dotenv(find_dotenv()) 7 | openai.api_key = os.environ.get("OPENAI_API_KEY") 8 | 9 | def parse_email(email_thread): 10 | 11 | system_prompt = """ 12 | You are an expert of convert raw email thread into original message / reply pairs. 13 | You are given a raw email thread that Jason reply to others, your goal is to convert it into original message / reply pairs. 14 | - orignal_message: the last message sent to Jason, if it is a long email thread, only take the last message 15 | - jason_reply: Jason's reply to the original message 16 | 17 | if there is only one message in the thread, that should be jason_reply 18 | 19 | The exported format should look something like 20 | { 21 | "original_message": "xxxx", 22 | "jason_reply": "xxxx" 23 | } 24 | """ 25 | 26 | response = openai.ChatCompletion.create( 27 | model="gpt-4", 28 | messages=[ 29 | {"role": "system", "content": system_prompt}, 30 | {"role": "user", "content": email_thread} 31 | ] 32 | ) 33 | 34 | return response["choices"][0]["message"]["content"] 35 | 36 | def process_csv(input_csv_path, output_csv_path): 37 | with open(input_csv_path, newline='', encoding='utf-8') as csvfile: 38 | csv_reader = csv.DictReader(csvfile) 39 | processed_data = [] 40 | 41 | for row in csv_reader: 42 | text = row['Body'] # Get the text from the 'body' column 43 | json_string = parse_email(text) 44 | print(json_string) 45 | json_data = json.loads(json_string) # Convert JSON string to dictionary 46 | original_message = json_data.get('original_message', '') 47 | jason_reply = json_data.get('jason_reply', '') 48 | # Append original row data and new columns to processed_data 49 | processed_data.append([original_message, jason_reply]) 50 | 51 | # Write processed data to a new CSV file 52 | with open(output_csv_path, mode='w', newline='', encoding='utf-8') as csvfile: 53 | csv_writer = csv.writer(csvfile) 54 | # Write header 55 | csv_writer.writerow(['original_message', 'jason_reply']) 56 | # Write data rows 57 | csv_writer.writerows(processed_data) 58 | 59 | # Paths to your input and output CSV files 60 | input_csv_path = 'past_email_final_mboxt.csv' 61 | output_csv_path = 'email_pairs.csv' 62 | 63 | # Call the function to process the CSV file 64 | process_csv(input_csv_path, output_csv_path) 65 | -------------------------------------------------------------------------------- /extract_faq.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import json 3 | from dotenv import find_dotenv, load_dotenv 4 | from langchain.prompts import PromptTemplate 5 | from langchain.chat_models import ChatOpenAI 6 | from langchain.text_splitter import RecursiveCharacterTextSplitter 7 | from langchain.chains.summarize import load_summarize_chain 8 | 9 | load_dotenv() 10 | llm = ChatOpenAI(temperature=0, model_name="gpt-4") 11 | 12 | def load_csv(file_path): 13 | # Create a list to hold dictionaries 14 | data_list = [] 15 | 16 | # Open the CSV file and read its content 17 | with open(file_path, 'r') as csv_file: 18 | csv_reader = csv.DictReader(csv_file) 19 | 20 | # For each row, append it as a dictionary to the list 21 | for row in csv_reader: 22 | data_list.append(row) 23 | 24 | return data_list 25 | 26 | def extract_faq(text_data): 27 | text_splitter = RecursiveCharacterTextSplitter( 28 | chunk_size=3000, 29 | chunk_overlap=20, 30 | length_function = len, 31 | is_separator_regex=False) 32 | 33 | texts = text_splitter.split_text(text_data) 34 | docs = text_splitter.create_documents(texts) 35 | 36 | 37 | map_prompt = """ 38 | PAST EMAILS: 39 | {text} 40 | ---- 41 | 42 | You are a smart AI assistant, above is some past emails from AI Jason (an AI youtuber), 43 | your goal is to learn & extract common FAQ about AI Jason 44 | (include both question & answer, return results in JSON): 45 | """ 46 | map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text"]) 47 | 48 | combine_prompt = """ 49 | The following is set of FAQ about AI Jason (an AI youtuber): 50 | {text} 51 | Take these and distill it into a final, consolidated array of faq, 52 | include both question & answer (in JSON format). 53 | 54 | array of FAQ: 55 | """ 56 | combine_prompt_template = PromptTemplate(template=combine_prompt, input_variables=["text"]) 57 | 58 | summary_chain = load_summarize_chain(llm=llm, 59 | chain_type='map_reduce', 60 | map_prompt=map_prompt_template, 61 | combine_prompt=combine_prompt_template, 62 | verbose=True 63 | ) 64 | 65 | output = summary_chain.run(docs) 66 | faqs = json.loads(output) 67 | 68 | return faqs 69 | 70 | def save_json_to_csv(data, file_name): 71 | with open(file_name, mode='w', newline='', encoding='utf-8') as file: 72 | # Get the keys (column names) from the first dictionary in the list 73 | fieldnames = data[0].keys() 74 | 75 | # Create a CSV dict writer object 76 | writer = csv.DictWriter(file, fieldnames=fieldnames) 77 | 78 | # Write the header row 79 | writer.writeheader() 80 | 81 | # Write the data rows 82 | for entry in data: 83 | writer.writerow(entry) 84 | 85 | 86 | # Print or save the JSON data 87 | past_emails = load_csv("email_pairs.csv") 88 | 89 | # Extracting Jason's replies 90 | jasons_replies = [entry["jason_reply"] for entry in past_emails] 91 | jasons_replies_string = json.dumps(jasons_replies) 92 | 93 | faqs = extract_faq(jasons_replies_string) 94 | 95 | save_json_to_csv(faqs, "faq.csv") 96 | 97 | -------------------------------------------------------------------------------- /mbox_to_csv.py: -------------------------------------------------------------------------------- 1 | import mailbox 2 | import csv 3 | from email import policy 4 | from email.parser import BytesParser 5 | 6 | def get_body(message): 7 | if message.is_multipart(): 8 | for part in message.walk(): 9 | if part.is_multipart(): 10 | for subpart in part.walk(): 11 | if subpart.get_content_type() == 'text/plain': 12 | return subpart.get_payload(decode=True) 13 | elif part.get_content_type() == 'text/plain': 14 | return part.get_payload(decode=True) 15 | else: 16 | return message.get_payload(decode=True) 17 | 18 | def mbox_to_csv(mbox_file_path, csv_file_path): 19 | mbox = mailbox.mbox(mbox_file_path) 20 | 21 | with open(csv_file_path, mode='w', newline='', encoding='utf-8') as file: 22 | writer = csv.writer(file) 23 | writer.writerow(['Subject', 'From', 'Date', 'To', 'Message-ID', 'Body']) 24 | 25 | for message in mbox: 26 | body = get_body(message) # Get the message body using the new get_body function 27 | if body: 28 | body = body.decode('utf-8', errors='replace').replace('\n', ' ').replace('\r', '') 29 | else: 30 | body = '' 31 | writer.writerow([ 32 | message['subject'], 33 | message['from'], 34 | message['date'], 35 | message['to'], 36 | message['message-id'], 37 | body 38 | ]) 39 | 40 | # Usage 41 | mbox_file_path = 'Sent.mbox' # replace with the path to your MBOX file 42 | csv_file_path = 'past_email_mbox.csv' # replace with the desired path for the output CSV file 43 | mbox_to_csv(mbox_file_path, csv_file_path) 44 | --------------------------------------------------------------------------------