├── .gitignore ├── README.md ├── main.py ├── ppt_data_gen.py ├── ppt_gen.py └── todo.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.pptx 2 | __pycache__ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PPT Generator 2 | A local LLM assisted ppt generation tool 3 | 4 | ## Why 5 | Writing presentations for course assignments is just boilerplate work most often, especially when even the lecturers dont even care about it. 6 | Thats why I automated the boilerplate work, just enter a topic and the tool generates a simple presentation , enough to satisfy the base course requirement. 7 | 8 | ## Running Locally 9 | install [ollama](https://ollama.ai/download) 10 | and have it up and running with command `ollama serve` ( applicable to some systems only ) 11 | 12 | download the required model ( this can be changed in this [line](https://github.com/Govind-S-B/ppt_generator/blob/main/ppt_data_gen.py#L24) ) 13 | ``` 14 | ollama pull dolphin2.1-mistral 15 | ``` 16 | 17 | 18 | clone the repo and move into the directory 19 | ``` 20 | git clone https://github.com/Govind-S-B/ppt_generator.git 21 | cd ppt_generator 22 | ``` 23 | install the required python dependencies 24 | ``` 25 | pip install -r requirements.txt 26 | ``` 27 | run the streamlit app 28 | ``` 29 | streamlit run main.py 30 | ``` 31 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from ppt_data_gen import slide_data_gen 3 | from ppt_gen import ppt_gen 4 | 5 | st.title("PPT Generator") 6 | 7 | topic = st.text_input("Enter a topic:") 8 | 9 | if st.button("Generate"): 10 | data = slide_data_gen(topic) 11 | ppt_file = ppt_gen(data) 12 | 13 | file_name = f"Presentation.pptx" 14 | 15 | st.download_button( 16 | label="Download Presentation", 17 | data=ppt_file, 18 | file_name=file_name, 19 | mime="application/vnd.openxmlformats-officedocument.presentationml.presentation", 20 | ) 21 | -------------------------------------------------------------------------------- /ppt_data_gen.py: -------------------------------------------------------------------------------- 1 | import re 2 | from langchain.llms import Ollama 3 | 4 | 5 | def extract_items(input_string): 6 | # Find the text inside the << >> 7 | content = re.search(r'<<(.+?)>>', input_string) 8 | 9 | if content: 10 | content = content.group(1) 11 | else: 12 | return [] 13 | 14 | # Split the content by the | separator and remove whitespace 15 | items = [item.strip() for item in content.split('|')] 16 | 17 | # Remove the quotes from each item 18 | items = [re.sub(r'^"|"$', '', item) for item in items] 19 | 20 | return items 21 | 22 | 23 | def slide_data_gen(topic): 24 | llm = Ollama(model="dolphin2.1-mistral", 25 | temperature="0.4") 26 | 27 | slide_data = [] 28 | 29 | point_count = 5 30 | 31 | slide_data.append(extract_items(llm(f""" 32 | You are a text summarization and formatting specialized model that fetches relevant information 33 | 34 | For the topic "{topic}" suggest a presentation title and a presentation subtitle it should be returned in the format : 35 | << "title" | "subtitle >> 36 | 37 | example : 38 | << "Ethics in Design" | "Integrating Ethics into Design Processes" >> 39 | """))) 40 | 41 | slide_data.append(extract_items(llm(f""" 42 | You are a text summarization and formatting specialized model that fetches relevant information 43 | 44 | For the presentation titled "{slide_data[0][0]}" and with subtitle "{slide_data[0][1]}" for the topic "{topic}" 45 | Write a table of contents containing the title of each slide for a 7 slide presentation 46 | It should be of the format : 47 | << "slide1" | "slide2" | "slide3" | ... | >> 48 | 49 | example : 50 | << "Introduction to Design Ethics" | "User-Centered Design" | "Transparency and Honesty" | "Data Privacy and Security" | "Accessibility and Inclusion" | "Social Impact and Sustainability" | "Ethical AI and Automation" | "Collaboration and Professional Ethics" >> 51 | """))) 52 | 53 | for subtopic in slide_data[1]: 54 | 55 | data_to_clean = llm(f""" 56 | You are a content generation specialized model that fetches relevant information and presents it in clear concise manner 57 | 58 | For the presentation titled "{slide_data[0][0]}" and with subtitle "{slide_data[0][1]}" for the topic "{topic}" 59 | Write the contents for a slide with the subtopic {subtopic} 60 | Write {point_count} points. Each point 10 words maximum. 61 | Make the points short, concise and to the point. 62 | """) 63 | 64 | cleaned_data = llm(f""" 65 | You are a text summarization and formatting specialized model that fetches relevant information and formats it into user specified formats 66 | Given below is a text draft for a presentation slide containing {point_count} points , extract the {point_count} sentences and format it as : 67 | 68 | << "point1" | "point2" | "point3" | ... | >> 69 | 70 | example : 71 | << "Foster a collaborative and inclusive work environment." | "Respect intellectual property rights and avoid plagiarism." | "Uphold professional standards and codes of ethics." | "Be open to feedback and continuous learning." >> 72 | 73 | -- Beginning of the text -- 74 | {data_to_clean} 75 | -- End of the text -- 76 | """) 77 | 78 | slide_data.append([subtopic] + extract_items(cleaned_data)) 79 | 80 | return slide_data 81 | -------------------------------------------------------------------------------- /ppt_gen.py: -------------------------------------------------------------------------------- 1 | from pptx import Presentation 2 | from pptx.util import Pt 3 | from pptx.dml.color import RGBColor 4 | from pptx.enum.text import PP_ALIGN 5 | from pptx.enum.text import MSO_AUTO_SIZE 6 | 7 | import re 8 | import io 9 | 10 | 11 | def sanitize_string(input_str): 12 | # Remove non-alphanumeric, underscores, hyphens, and periods 13 | sanitized = re.sub(r"[^A-Za-z0-9_.-]", "", input_str) 14 | 15 | # Replace consecutive periods with a single period 16 | sanitized = re.sub(r"\.{2,}", ".", sanitized) 17 | 18 | # Ensure the string starts and ends with an alphanumeric character 19 | sanitized = re.sub(r"^[^A-Za-z0-9]+", "", sanitized) 20 | sanitized = re.sub(r"[^A-Za-z0-9]+$", "", sanitized) 21 | 22 | # Truncate or pad string to meet the 3-63 character length requirement 23 | sanitized = sanitized[:63] if len( 24 | sanitized) > 63 else sanitized.ljust(3, "_") 25 | 26 | return sanitized 27 | 28 | 29 | def ppt_gen(slide_data): 30 | ppt = Presentation() 31 | 32 | # Setting Background 33 | slide_master = ppt.slide_master 34 | slide_master.background.fill.solid() 35 | slide_master.background.fill.fore_color.rgb = RGBColor(0, 0, 0) 36 | 37 | # Title Screen 38 | curr_slide = ppt.slides.add_slide(ppt.slide_layouts[0]) 39 | curr_slide.shapes.title.text = slide_data[0][0] 40 | curr_slide.shapes.title.text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE 41 | curr_slide.shapes.title.text_frame.paragraphs[0].runs[0].font.color.rgb = RGBColor( 42 | 255, 255, 255) 43 | curr_slide.shapes.placeholders[1].text = slide_data[0][1] 44 | curr_slide.shapes.placeholders[1].text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE 45 | curr_slide.shapes.placeholders[1].text_frame.paragraphs[0].runs[0].font.color.rgb = RGBColor( 46 | 255, 255, 255) 47 | 48 | # Overview 49 | curr_slide = ppt.slides.add_slide(ppt.slide_layouts[1]) 50 | curr_slide.shapes.title.text = "Overview" 51 | curr_slide.shapes.title.text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE 52 | curr_slide.shapes.title.text_frame.paragraphs[0].runs[0].font.color.rgb = RGBColor( 53 | 255, 255, 255) 54 | for content in slide_data[1]: 55 | tframe = curr_slide.shapes.placeholders[1].text_frame 56 | tframe.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE 57 | para = tframe.add_paragraph() 58 | para.text = content 59 | para.level = 1 60 | para.font.color.rgb = RGBColor( 61 | 255, 255, 255) 62 | 63 | # Content Slides 64 | for curr_slide_data in slide_data[2:]: 65 | curr_slide = ppt.slides.add_slide(ppt.slide_layouts[1]) 66 | curr_slide.shapes.title.text = curr_slide_data[0] 67 | curr_slide.shapes.title.text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE 68 | curr_slide.shapes.title.text_frame.paragraphs[0].font.color.rgb = RGBColor( 69 | 255, 255, 255) 70 | for content in curr_slide_data[1:]: 71 | tframe = curr_slide.shapes.placeholders[1].text_frame 72 | tframe.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE 73 | para = tframe.add_paragraph() 74 | para.text = content 75 | para.level = 1 76 | para.font.color.rgb = RGBColor( 77 | 255, 255, 255) 78 | 79 | # Thank You Screen 80 | curr_slide = ppt.slides.add_slide(ppt.slide_layouts[2]) 81 | curr_slide.shapes.placeholders[1].text = "Thank You" 82 | 83 | curr_slide.shapes.placeholders[1].text_frame.paragraphs[0].font.color.rgb = RGBColor( 84 | 255, 255, 255) 85 | curr_slide.shapes.placeholders[1].text_frame.paragraphs[0].font.size = Pt( 86 | 96) 87 | curr_slide.shapes.placeholders[1].text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER 88 | 89 | # f"{sanitize_string(slide_data[0][0])}.pptx" 90 | ppt_stream = io.BytesIO() 91 | ppt.save(ppt_stream) 92 | ppt_stream.seek(0) 93 | 94 | return ppt_stream 95 | -------------------------------------------------------------------------------- /todo.md: -------------------------------------------------------------------------------- 1 | # To Do 2 | - figure out a method to set font color in master slide itself 3 | - understand regex better and implement that for text extraction functions 4 | - add text format validation and retry loop ( again regex here ) 5 | - Optional At each step of content generation , ddg search for the appropriate content , fetch top 3 results , use RAG on that 6 | - Add text autofit for large paragraphs ( temp fix : reduce points to 3) --------------------------------------------------------------------------------