├── .gitignore
├── README.md
├── main.py
├── ppt_data_gen.py
├── ppt_gen.py
└── todo.md


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pptx
2 | __pycache__


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PPT Generator
 2 | A local LLM assisted ppt generation tool 
 3 | 
 4 | ## Why  
 5 | Writing presentations for course assignments is just boilerplate work most often, especially when even the lecturers dont even care about it.
 6 | Thats why I automated the boilerplate work, just enter a topic and the tool generates a simple presentation , enough to satisfy the base course requirement.
 7 | 
 8 | ## Running Locally
 9 | install [ollama](https://ollama.ai/download)
10 | and have it up and running with command `ollama serve` ( applicable to some systems only )  
11 | 
12 | download the required model ( this can be changed in this [line](https://github.com/Govind-S-B/ppt_generator/blob/main/ppt_data_gen.py#L24) )
13 | ```
14 | ollama pull dolphin2.1-mistral
15 | ```
16 | 
17 | 
18 | clone the repo and move into the directory
19 | ```
20 | git clone https://github.com/Govind-S-B/ppt_generator.git
21 | cd ppt_generator
22 | ```
23 | install the required python dependencies
24 | ```
25 | pip install -r requirements.txt
26 | ```
27 | run the streamlit app
28 | ```
29 | streamlit run main.py
30 | ```
31 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | from ppt_data_gen import slide_data_gen
 3 | from ppt_gen import ppt_gen
 4 | 
 5 | st.title("PPT Generator")
 6 | 
 7 | topic = st.text_input("Enter a topic:")
 8 | 
 9 | if st.button("Generate"):
10 |     data = slide_data_gen(topic)
11 |     ppt_file = ppt_gen(data)
12 | 
13 |     file_name = f"Presentation.pptx"
14 | 
15 |     st.download_button(
16 |         label="Download Presentation",
17 |         data=ppt_file,
18 |         file_name=file_name,
19 |         mime="application/vnd.openxmlformats-officedocument.presentationml.presentation",
20 |     )
21 | 


--------------------------------------------------------------------------------
/ppt_data_gen.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | from langchain.llms import Ollama
 3 | 
 4 | 
 5 | def extract_items(input_string):
 6 |     # Find the text inside the << >>
 7 |     content = re.search(r'<<(.+?)>>', input_string)
 8 | 
 9 |     if content:
10 |         content = content.group(1)
11 |     else:
12 |         return []
13 | 
14 |     # Split the content by the | separator and remove whitespace
15 |     items = [item.strip() for item in content.split('|')]
16 | 
17 |     # Remove the quotes from each item
18 |     items = [re.sub(r'^"|"$', '', item) for item in items]
19 | 
20 |     return items
21 | 
22 | 
23 | def slide_data_gen(topic):
24 |     llm = Ollama(model="dolphin2.1-mistral",
25 |                  temperature="0.4")
26 | 
27 |     slide_data = []
28 | 
29 |     point_count = 5
30 | 
31 |     slide_data.append(extract_items(llm(f"""
32 |     You are a text summarization and formatting specialized model that fetches relevant information
33 | 
34 |     For the topic "{topic}" suggest a presentation title and a presentation subtitle it should be returned in the format :
35 |     << "title" | "subtitle >>
36 | 
37 |     example :
38 |     << "Ethics in Design" | "Integrating Ethics into Design Processes" >>
39 |     """)))
40 | 
41 |     slide_data.append(extract_items(llm(f"""
42 |     You are a text summarization and formatting specialized model that fetches relevant information
43 |             
44 |     For the presentation titled "{slide_data[0][0]}" and with subtitle "{slide_data[0][1]}" for the topic "{topic}"
45 |     Write a table of contents containing the title of each slide for a 7 slide presentation
46 |     It should be of the format :
47 |     << "slide1" | "slide2" | "slide3" | ... | >>
48 |             
49 |     example :
50 |     << "Introduction to Design Ethics" | "User-Centered Design" | "Transparency and Honesty" | "Data Privacy and Security" | "Accessibility and Inclusion" | "Social Impact and Sustainability" | "Ethical AI and Automation" | "Collaboration and Professional Ethics" >>          
51 |     """)))
52 | 
53 |     for subtopic in slide_data[1]:
54 | 
55 |         data_to_clean = llm(f"""
56 |         You are a content generation specialized model that fetches relevant information and presents it in clear concise manner
57 |                 
58 |         For the presentation titled "{slide_data[0][0]}" and with subtitle "{slide_data[0][1]}" for the topic "{topic}"
59 |         Write the contents for a slide with the subtopic {subtopic}
60 |         Write {point_count} points. Each point 10 words maximum.
61 |         Make the points short, concise and to the point.
62 |         """)
63 | 
64 |         cleaned_data = llm(f"""
65 |         You are a text summarization and formatting specialized model that fetches relevant information and formats it into user specified formats
66 |         Given below is a text draft for a presentation slide containing {point_count} points , extract the {point_count} sentences and format it as :
67 |                     
68 |         << "point1" | "point2" | "point3" | ... | >>
69 |                     
70 |         example :
71 |         << "Foster a collaborative and inclusive work environment." | "Respect intellectual property rights and avoid plagiarism." | "Uphold professional standards and codes of ethics." | "Be open to feedback and continuous learning." >>
72 | 
73 |         -- Beginning of the text --
74 |         {data_to_clean}
75 |         -- End of the text --         
76 |         """)
77 | 
78 |         slide_data.append([subtopic] + extract_items(cleaned_data))
79 | 
80 |     return slide_data
81 | 


--------------------------------------------------------------------------------
/ppt_gen.py:
--------------------------------------------------------------------------------
 1 | from pptx import Presentation
 2 | from pptx.util import Pt
 3 | from pptx.dml.color import RGBColor
 4 | from pptx.enum.text import PP_ALIGN
 5 | from pptx.enum.text import MSO_AUTO_SIZE
 6 | 
 7 | import re
 8 | import io
 9 | 
10 | 
11 | def sanitize_string(input_str):
12 |     # Remove non-alphanumeric, underscores, hyphens, and periods
13 |     sanitized = re.sub(r"[^A-Za-z0-9_.-]", "", input_str)
14 | 
15 |     # Replace consecutive periods with a single period
16 |     sanitized = re.sub(r"\.{2,}", ".", sanitized)
17 | 
18 |     # Ensure the string starts and ends with an alphanumeric character
19 |     sanitized = re.sub(r"^[^A-Za-z0-9]+", "", sanitized)
20 |     sanitized = re.sub(r"[^A-Za-z0-9]+$", "", sanitized)
21 | 
22 |     # Truncate or pad string to meet the 3-63 character length requirement
23 |     sanitized = sanitized[:63] if len(
24 |         sanitized) > 63 else sanitized.ljust(3, "_")
25 | 
26 |     return sanitized
27 | 
28 | 
29 | def ppt_gen(slide_data):
30 |     ppt = Presentation()
31 | 
32 |     # Setting Background
33 |     slide_master = ppt.slide_master
34 |     slide_master.background.fill.solid()
35 |     slide_master.background.fill.fore_color.rgb = RGBColor(0, 0, 0)
36 | 
37 |     # Title Screen
38 |     curr_slide = ppt.slides.add_slide(ppt.slide_layouts[0])
39 |     curr_slide.shapes.title.text = slide_data[0][0]
40 |     curr_slide.shapes.title.text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
41 |     curr_slide.shapes.title.text_frame.paragraphs[0].runs[0].font.color.rgb = RGBColor(
42 |         255, 255, 255)
43 |     curr_slide.shapes.placeholders[1].text = slide_data[0][1]
44 |     curr_slide.shapes.placeholders[1].text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
45 |     curr_slide.shapes.placeholders[1].text_frame.paragraphs[0].runs[0].font.color.rgb = RGBColor(
46 |         255, 255, 255)
47 | 
48 |     # Overview
49 |     curr_slide = ppt.slides.add_slide(ppt.slide_layouts[1])
50 |     curr_slide.shapes.title.text = "Overview"
51 |     curr_slide.shapes.title.text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
52 |     curr_slide.shapes.title.text_frame.paragraphs[0].runs[0].font.color.rgb = RGBColor(
53 |         255, 255, 255)
54 |     for content in slide_data[1]:
55 |         tframe = curr_slide.shapes.placeholders[1].text_frame
56 |         tframe.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
57 |         para = tframe.add_paragraph()
58 |         para.text = content
59 |         para.level = 1
60 |         para.font.color.rgb = RGBColor(
61 |             255, 255, 255)
62 | 
63 |     # Content Slides
64 |     for curr_slide_data in slide_data[2:]:
65 |         curr_slide = ppt.slides.add_slide(ppt.slide_layouts[1])
66 |         curr_slide.shapes.title.text = curr_slide_data[0]
67 |         curr_slide.shapes.title.text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
68 |         curr_slide.shapes.title.text_frame.paragraphs[0].font.color.rgb = RGBColor(
69 |             255, 255, 255)
70 |         for content in curr_slide_data[1:]:
71 |             tframe = curr_slide.shapes.placeholders[1].text_frame
72 |             tframe.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
73 |             para = tframe.add_paragraph()
74 |             para.text = content
75 |             para.level = 1
76 |             para.font.color.rgb = RGBColor(
77 |                 255, 255, 255)
78 | 
79 |     # Thank You Screen
80 |     curr_slide = ppt.slides.add_slide(ppt.slide_layouts[2])
81 |     curr_slide.shapes.placeholders[1].text = "Thank You"
82 | 
83 |     curr_slide.shapes.placeholders[1].text_frame.paragraphs[0].font.color.rgb = RGBColor(
84 |         255, 255, 255)
85 |     curr_slide.shapes.placeholders[1].text_frame.paragraphs[0].font.size = Pt(
86 |         96)
87 |     curr_slide.shapes.placeholders[1].text_frame.paragraphs[0].alignment = PP_ALIGN.CENTER
88 | 
89 |     # f"{sanitize_string(slide_data[0][0])}.pptx"
90 |     ppt_stream = io.BytesIO()
91 |     ppt.save(ppt_stream)
92 |     ppt_stream.seek(0)
93 | 
94 |     return ppt_stream
95 | 


--------------------------------------------------------------------------------
/todo.md:
--------------------------------------------------------------------------------
1 | # To Do
2 | - figure out a method to set font color in master slide itself
3 | - understand regex better and implement that for text extraction functions
4 | - add text format validation and retry loop ( again regex here )
5 | - Optional At each step of content generation , ddg search for the appropriate content , fetch top 3 results , use RAG on that
6 | - Add text autofit for large paragraphs ( temp fix : reduce points to 3)


--------------------------------------------------------------------------------