├── .gitignore
├── .gitmodules
├── LICENSE
├── README.md
├── app.py
├── fonts
    ├── Arial.ttf
    ├── ArialBd.ttf
    ├── ArialBdIt.ttf
    ├── ArialIt.ttf
    ├── ComicSansMS.ttf
    ├── ComicSansMSBd.ttf
    ├── CourierNew.ttf
    ├── Georgia.ttf
    ├── GeorgiaBd.ttf
    ├── GeorgiaBdIt.ttf
    ├── GeorgiaIt.ttf
    ├── Helvetica.ttf
    ├── HelveticaBd.ttf
    ├── HelveticaNeue.ttf
    ├── HelveticaNeueBd.ttf
    ├── LucidaSansUnicode.ttf
    ├── README
    ├── Tahoma.ttf
    ├── TahomaBd.ttf
    ├── TimesNewRoman.ttf
    ├── TimesNewRomanBd.ttf
    ├── TimesNewRomanBdIt.ttf
    ├── TimesNewRomanIt.ttf
    ├── TrebuchetMS.ttf
    ├── TrebuchetMSBd.ttf
    ├── TrebuchetMSBdIt.ttf
    ├── TrebuchetMSIt.ttf
    ├── Verdana.ttf
    ├── VerdanaBd.ttf
    ├── VerdanaBdIt.ttf
    ├── VerdanaIt.ttf
    └── some-nice-fonts.spec
├── main.py
├── requirements.txt
└── src
    ├── __init__.py
    ├── constructor.py
    ├── font.py
    ├── gigachat.py
    ├── kandinsky.py
    ├── llm_utils.py
    ├── prompt_configs
        ├── __init__.py
        ├── en_gigachat_config.py
        ├── prompt_config.py
        └── ru_gigachat_config.py
    └── slides
        ├── __init__.py
        ├── generate_slide.py
        ├── image_slide.py
        ├── plain_text_slide.py
        ├── slide_utils.py
        └── title_slide.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | ngrok-v3-stable-linux-s390x.tgz
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # Jupyter Notebook checkpoints
 29 | **/.ipynb_checkpoints/
 30 | 
 31 | # Images and model weights
 32 | images/
 33 | *.png
 34 | *.jpg
 35 | *.jpeg
 36 | *.bmp
 37 | *.gif
 38 | *.tiff
 39 | *.ico
 40 | *.svg
 41 | *.eps
 42 | 
 43 | # Model weights and checkpoints
 44 | *.h5
 45 | *.hdf5
 46 | *.ckpt
 47 | *.pth
 48 | *.pt
 49 | *.onnx
 50 | 
 51 | # Python virtual environment
 52 | venv/
 53 | ENV/
 54 | env/
 55 | env.bak/
 56 | venv.bak/
 57 | 
 58 | # Editor directories and files
 59 | .vscode/
 60 | .idea/
 61 | *.sublime-project
 62 | *.sublime-workspace
 63 | 
 64 | # OS-specific files
 65 | .DS_Store
 66 | Thumbs.db
 67 | 
 68 | # Logs
 69 | logs/
 70 | *.log
 71 | 
 72 | # Generated files
 73 | *.bak
 74 | *.tmp
 75 | *.swp
 76 | *.swo
 77 | 
 78 | # Secret files
 79 | .env
 80 | .secret
 81 | 
 82 | # TensorBoard log files
 83 | runs/
 84 | events.out.tfevents.*
 85 | 
 86 | # Dataset files
 87 | *.csv
 88 | *.tsv
 89 | *.json
 90 | *.xml
 91 | *.zip
 92 | *.tar
 93 | *.tar.gz
 94 | *.tgz
 95 | *.bz2
 96 | *.7z
 97 | 
 98 | # Miscellaneous
 99 | *.ipynb
100 | 
101 | # Ignore data directories
102 | data/
103 | dataset/
104 | datasets/
105 | 
106 | # Ignore model directories
107 | models/
108 | weights/
109 | checkpoints/


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "Kandinsky-3"]
2 | 	path = Kandinsky-3
3 | 	url = https://github.com/ai-forever/Kandinsky-3.git
4 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Said Azizov
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | 
  2 | # README
  3 | 
  4 | ## Overview
  5 | 
  6 | This project generates a PowerPoint presentation based on user-provided descriptions. It leverages language models to generate text content and an image generation API to create images for the slides. The architecture is modular, allowing for easy extension and customization of the text and image generation components.
  7 | 
  8 | ## How to Use
  9 | 
 10 | ### Prerequisites
 11 | 
 12 | - Python 3.10 or higher
 13 | - Required Python packages (listed in `requirements.txt`)
 14 | 
 15 | ### Setup
 16 | 
 17 | 1. **Clone the repository**:
 18 | 
 19 |    ```bash
 20 |    git clone --recurse-submodules https://github.com/ai-forever/slides_generator.git
 21 |    cd slides_generator
 22 |    ```
 23 | 
 24 | 2. **Install dependencies**:
 25 | 
 26 |    ```bash
 27 |    pip install -r requirements.txt
 28 |    ```
 29 | 
 30 | 3. **Create a .env file** in the root directory with GigaChat credentials:
 31 | 
 32 | Here is the [documentation](https://developers.sber.ru/portal/products/gigachat-api) on how to get access token.
 33 | 
 34 |    ```plaintext
 35 |    AUTH_TOKEN=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 36 |    COOKIE=XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 37 |    ```
 38 | 
 39 | 
 40 | 4. **Run the FastAPI server** for the image generation API:
 41 | 
 42 |    ```bash
 43 |    python src/kandinsky.py
 44 |    ```
 45 | 
 46 | ### Running the Script
 47 | 
 48 | To generate a presentation, use the following command:
 49 | 
 50 | ```bash
 51 | python main.py -d "Description of the presentation"  -l 'en'
 52 | ```
 53 | 
 54 | This will generate a presentation based on the provided description and save it in the `logs` directory with a timestamp.
 55 | 
 56 | ## Examples
 57 | 
 58 | ```bash
 59 | python main.py -d "Сгенерируй презентацию про планеты солнечной системы" -l 'ru'
 60 | ```
 61 | 
 62 | ```bash
 63 | python main.py -d "Generate presentation about planets of Solar system" -l 'en'
 64 | ```
 65 | 
 66 | This command will create a presentation on the topic "Planets of the Solar System" using the configured text and image generation functions.
 67 | 
 68 | ## Architecture
 69 | 
 70 | ### Main Components
 71 | 
 72 | 1. **main.py**: The entry point of the application. It parses command-line arguments, initializes required components, and orchestrates the presentation generation process.
 73 | 
 74 | 2. **Font Class (src/font.py)**: Manages fonts used in the presentation. It can select a random font with basic and bold styles and provide paths to various font styles (basic, bold, italic, and italic bold).
 75 | 
 76 | 3. **Presentation Generation Functions (src/constructor.py)**: Functions that generate different types of slides in the presentation. They handle the layout, font settings, and placement of text and images.
 77 | 
 78 | 4. **Text Generation (src/gigachat.py)**: Contains the `giga_generate` function, which generates text based on a given prompt.
 79 | 
 80 | 5. **Image Generation (src/kandinsky.py)**: Includes the `api_k31_generate` function, which generates images based on a prompt using an external API. Additionally, it provides a FastAPI server for the image generation API.
 81 | 
 82 | 6. **Prompt Configuration (src/prompt_configs.py)**: Defines the structure of prompts used for generating titles, text, images, and backgrounds for slides.
 83 | 
 84 | ### How It Works
 85 | 
 86 | 1. **Initialization**:
 87 |     - `main.py` parses command-line arguments to get the presentation description.
 88 |     - It initializes the `Font` class with the directory containing font files and sets a random font.
 89 | 
 90 | 2. **Prompt Configuration**:
 91 |     - The `ru_gigachat_config` defines the structure and content of prompts used for generating slide components (titles, text, images, backgrounds).
 92 | 
 93 | 3. **Text and Image Generation**:
 94 |     - The `giga_generate` function generates text based on the provided description.
 95 |     - The `api_k31_generate` function generates images based on prompts using the FastAPI server.
 96 | 
 97 | 4. **Slide Generation**:
 98 |     - The `generate_presentation` function orchestrates the creation of slides by calling appropriate functions to generate text and images, and then formats them into slides.
 99 | 
100 | ## Extending the Project
101 | 
102 | ### Adding New Font Styles
103 | 
104 | To add new font styles, place the font files in the `fonts` directory and update the `Font` class if necessary to recognize the new styles.
105 | 
106 | ### Changing Text Generation
107 | 
108 | To use a different text generation function, replace the `giga_generate` function from `src/gigachat.py` or add a new function and update the call in `main.py`.
109 | 
110 | ### Changing Image Generation
111 | 
112 | To use a different image generation API, modify the `api_k31_generate` function in `src/kandinsky.py` or add a new function and update the call in `main.py`.
113 | 
114 | ## Acknowledgements
115 | 
116 | This project leverages the `python-pptx` library for PowerPoint generation, PIL for image processing, and other Python libraries for various functionalities. The text and image generation models are based on external APIs and language models.
117 | 
118 | ---
119 | 
120 | Feel free to reach out with any questions or suggestions!
121 | 
122 | ## Authors
123 | 
124 | + Said Azizov: [Github](https://github.com/stazizov), [Blog](https://t.me/said_azizau)
125 | 
126 | ## Citation
127 | 
128 | ```
129 | @misc{arkhipkin2023kandinsky,
130 |       title={Kandinsky 3.0 Technical Report}, 
131 |       author={Vladimir Arkhipkin and Andrei Filatov and Viacheslav Vasilev and Anastasia Maltseva and Said Azizov and Igor Pavlov and Julia Agafonova and Andrey Kuznetsov and Denis Dimitrov},
132 |       year={2023},
133 |       eprint={2312.03511},
134 |       archivePrefix={arXiv},
135 |       primaryClass={cs.CV}
136 | }
137 | ```
138 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | import time
 3 | from src.constructor import generate_presentation 
 4 | from src.prompt_configs import en_gigachat_config, ru_gigachat_config
 5 | from src.gigachat import giga_generate
 6 | from src.kandinsky import api_k31_generate
 7 | from src.font import Font
 8 | 
 9 | logs_dir = "logs"
10 | fonts_dir = "fonts"
11 | 
12 | def create_presentation(description: str, language: str):
13 |     # Select the appropriate prompt configuration based on the selected language
14 |     if language == "English":
15 |         prompt_config = en_gigachat_config
16 |     elif language == "Русский":
17 |         prompt_config = ru_gigachat_config
18 |     else: 
19 |         # set default to prevent interruptions in unexpected scenario
20 |         prompt_config = en_gigachat_config
21 |         
22 |     font = Font(fonts_dir)
23 |     font.set_random_font() 
24 |     
25 |     output_dir = f'{logs_dir}/{int(time.time())}'
26 | 
27 |     generate_presentation(
28 |         llm_generate=giga_generate, 
29 |         generate_image=api_k31_generate,
30 |         prompt_config=prompt_config, 
31 |         description=description,
32 |         font=font,
33 |         output_dir=output_dir,
34 |     )
35 | 
36 |     filename = f'{output_dir}/presentation.pptx'
37 |     
38 |     return filename
39 | 
40 | # Updated examples to include language selection
41 | examples = [
42 |     ["Generate a presentation on economics, 7 slides", "English"],
43 |     ["Сгенерируйте презентацию по экономике, 7 слайдов", "Русский"],
44 |     ["Create a presentation on climate change, 6 slides", "English"],
45 |     ["Создайте презентацию об изменении климата, 6 слайдов", "Русский"],
46 |     ["Create a presentation on artificial intelligence, 8 slides", "English"],
47 |     ["Создайте презентацию об искусственном интеллекте, 8 слайдов", "Русский"],
48 |     ["Design a presentation on space exploration, 10 slides", "English"],
49 |     ["Разработайте презентацию о космических исследованиях, 10 слайдов", "Русский"],
50 |     ["Prepare a presentation on the future of renewable energy, 7 slides", "English"],
51 |     ["Подготовьте презентацию о будущем возобновляемой энергетики, 7 слайдов", "Русский"],
52 |     ["Develop a presentation on the history of art movements, 9 slides", "English"],
53 |     ["Разработайте презентацию о истории художественных движений, 9 слайдов", "Русский"],
54 |     ["Generate a presentation on the impact of social media, 6 slides", "English"],
55 |     ["Сгенерируйте презентацию о влиянии социальных сетей, 6 слайдов", "Русский"],
56 |     ["Create a presentation on sustainable urban planning, 8 slides", "English"],
57 |     ["Создайте презентацию о устойчивом градостроительстве, 8 слайдов", "Русский"],
58 |     ["Разработайте презентацию о новшествах в области медицинских технологий, 7 слайдов", "Русский"],
59 |     ["Design a presentation on innovations in healthcare technology, 7 slides", "English"],
60 |     ["Подготовьте презентацию о глобальных экономических тенденциях, 5 слайдов", "Русский"],
61 |     ["Prepare a presentation on global economic trends, 5 slides", "English"],
62 |     ["Разработайте презентацию о психологии потребительского поведения, 6 слайдов", "Русский"],
63 |     ["Develop a presentation on the psychology of consumer behavior, 6 slides", "English"],
64 |     ["Сгенерируйте презентацию о преимуществах осознанности и медитации, 7 слайдов", "Русский"],
65 |     ["Generate a presentation on the benefits of mindfulness and meditation, 7 slides", "English"],
66 |     ["Создайте презентацию о достижениях в области автономных транспортных средств, 8 слайдов", "Русский"],
67 |     ["Create a presentation on advancements in autonomous vehicles, 8 slides", "English"],
68 |     ["Разработайте презентацию о влиянии изменений климатической политики, 5 слайдов", "Русский"],
69 |     ["Design a presentation on the impact of climate policy changes, 5 slides", "English"],
70 | ]
71 | 
72 | iface = gr.Interface(
73 |     fn=create_presentation,
74 |     inputs=[
75 |         gr.Textbox(
76 |             label="Presentation Description", 
77 |             placeholder="Enter the description for the presentation..."
78 |         ),
79 |         gr.Dropdown(
80 |             label="Language",
81 |             choices=["English", "Russian"],
82 |             value="English"
83 |         )
84 |     ],
85 |     outputs=gr.File(
86 |         label="Download Presentation"
87 |     ),
88 |     title="Presentation Generator",
89 |     description="Generate a presentation based on the provided description and selected language. Click the button to download the presentation.",
90 |     css="footer {visibility: hidden}",
91 |     allow_flagging="never",  
92 |     examples=examples  
93 | )
94 | 
95 | iface.launch()


--------------------------------------------------------------------------------
/fonts/Arial.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/Arial.ttf


--------------------------------------------------------------------------------
/fonts/ArialBd.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/ArialBd.ttf


--------------------------------------------------------------------------------
/fonts/ArialBdIt.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/ArialBdIt.ttf


--------------------------------------------------------------------------------
/fonts/ArialIt.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/ArialIt.ttf


--------------------------------------------------------------------------------
/fonts/ComicSansMS.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/ComicSansMS.ttf


--------------------------------------------------------------------------------
/fonts/ComicSansMSBd.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/ComicSansMSBd.ttf


--------------------------------------------------------------------------------
/fonts/CourierNew.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/CourierNew.ttf


--------------------------------------------------------------------------------
/fonts/Georgia.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/Georgia.ttf


--------------------------------------------------------------------------------
/fonts/GeorgiaBd.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/GeorgiaBd.ttf


--------------------------------------------------------------------------------
/fonts/GeorgiaBdIt.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/GeorgiaBdIt.ttf


--------------------------------------------------------------------------------
/fonts/GeorgiaIt.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/GeorgiaIt.ttf


--------------------------------------------------------------------------------
/fonts/Helvetica.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/Helvetica.ttf


--------------------------------------------------------------------------------
/fonts/HelveticaBd.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/HelveticaBd.ttf


--------------------------------------------------------------------------------
/fonts/HelveticaNeue.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/HelveticaNeue.ttf


--------------------------------------------------------------------------------
/fonts/HelveticaNeueBd.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/HelveticaNeueBd.ttf


--------------------------------------------------------------------------------
/fonts/LucidaSansUnicode.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/LucidaSansUnicode.ttf


--------------------------------------------------------------------------------
/fonts/README:
--------------------------------------------------------------------------------
 1 | copr-some-nice-fonts
 2 | ====================
 3 | 
 4 | This is a Fedora/CentOS repo for easy installing of some nice fonts. This
 5 | includes the following fonts:
 6 | 
 7 | - Arial
 8 | - Comic Sans MS
 9 | - Courier New
10 | - Georgia
11 | - Helvetica Neue
12 | - Helvetica
13 | - Lucida Sans Unicode
14 | - Tahoma
15 | - Times New Roman
16 | - Trebuchet MS
17 | - Verdana
18 | 
19 | Using it
20 | --------
21 | 
22 | sudo dnf copr enable adrienverge/some-nice-fonts
23 | sudo dnf install some-nice-fonts
24 | 
25 | Building it
26 | -----------
27 | 
28 | cp *.ttf ~/rpmbuild/SOURCES && rpmbuild -ba some-nice-fonts.spec
29 | 


--------------------------------------------------------------------------------
/fonts/Tahoma.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/Tahoma.ttf


--------------------------------------------------------------------------------
/fonts/TahomaBd.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/TahomaBd.ttf


--------------------------------------------------------------------------------
/fonts/TimesNewRoman.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/TimesNewRoman.ttf


--------------------------------------------------------------------------------
/fonts/TimesNewRomanBd.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/TimesNewRomanBd.ttf


--------------------------------------------------------------------------------
/fonts/TimesNewRomanBdIt.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/TimesNewRomanBdIt.ttf


--------------------------------------------------------------------------------
/fonts/TimesNewRomanIt.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/TimesNewRomanIt.ttf


--------------------------------------------------------------------------------
/fonts/TrebuchetMS.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/TrebuchetMS.ttf


--------------------------------------------------------------------------------
/fonts/TrebuchetMSBd.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/TrebuchetMSBd.ttf


--------------------------------------------------------------------------------
/fonts/TrebuchetMSBdIt.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/TrebuchetMSBdIt.ttf


--------------------------------------------------------------------------------
/fonts/TrebuchetMSIt.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/TrebuchetMSIt.ttf


--------------------------------------------------------------------------------
/fonts/Verdana.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/Verdana.ttf


--------------------------------------------------------------------------------
/fonts/VerdanaBd.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/VerdanaBd.ttf


--------------------------------------------------------------------------------
/fonts/VerdanaBdIt.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/VerdanaBdIt.ttf


--------------------------------------------------------------------------------
/fonts/VerdanaIt.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/fonts/VerdanaIt.ttf


--------------------------------------------------------------------------------
/fonts/some-nice-fonts.spec:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 Adrien Vergé
  2 | 
  3 | Name:      some-nice-fonts
  4 | Version:   2.0.0
  5 | Release:   1%{?dist}
  6 | Summary:   Some nice fonts including Arial, Courier New, Helvetica, etc.
  7 | License:   Proprietary
  8 | URL:       https://github.com/adrienverge/copr-some-nice-fonts
  9 | Source0:   Arial.ttf
 10 | Source1:   ArialBd.ttf
 11 | Source2:   ArialIt.ttf
 12 | Source3:   ArialBdIt.ttf
 13 | Source4:   ComicSansMS.ttf
 14 | Source5:   ComicSansMSBd.ttf
 15 | Source6:   CourierNew.ttf
 16 | Source7:   Georgia.ttf
 17 | Source8:   GeorgiaBd.ttf
 18 | Source9:   GeorgiaIt.ttf
 19 | Source10:  GeorgiaBdIt.ttf
 20 | Source11:  Helvetica.ttf
 21 | Source12:  HelveticaBd.ttf
 22 | Source13:  HelveticaNeue.ttf
 23 | Source14:  HelveticaNeueBd.ttf
 24 | Source15:  LucidaSansUnicode.ttf
 25 | Source16:  Tahoma.ttf
 26 | Source17:  TahomaBd.ttf
 27 | Source18:  TimesNewRoman.ttf
 28 | Source19:  TimesNewRomanBd.ttf
 29 | Source20:  TimesNewRomanIt.ttf
 30 | Source21:  TimesNewRomanBdIt.ttf
 31 | Source22:  TrebuchetMS.ttf
 32 | Source23:  TrebuchetMSBd.ttf
 33 | Source24:  TrebuchetMSIt.ttf
 34 | Source25:  TrebuchetMSBdIt.ttf
 35 | Source26:  Verdana.ttf
 36 | Source27:  VerdanaBd.ttf
 37 | Source28:  VerdanaIt.ttf
 38 | Source29:  VerdanaBdIt.ttf
 39 | 
 40 | BuildArch: noarch
 41 | BuildRequires: fontpackages-devel
 42 | 
 43 | %description
 44 | This package provides the following fonts:
 45 | - Arial
 46 | - Comic Sans MS
 47 | - Courier New
 48 | - Georgia
 49 | - Helvetica Neue
 50 | - Helvetica
 51 | - Lucida Sans Unicode
 52 | - Tahoma
 53 | - Times New Roman
 54 | - Trebuchet MS
 55 | - Verdana
 56 | 
 57 | 
 58 | %_font_pkg -n some-nice-fonts *.ttf
 59 | 
 60 | 
 61 | %prep
 62 | %setup -q -c -T -n some-nice-fonts-%{version}
 63 | 
 64 | 
 65 | %install
 66 | rm -rf %{buildroot}
 67 | install -m 0755 -d %{buildroot}%{_fontdir}
 68 | install -m 0644 -p %{SOURCE0} %{buildroot}%{_fontdir}
 69 | install -m 0644 -p %{SOURCE1} %{buildroot}%{_fontdir}
 70 | install -m 0644 -p %{SOURCE2} %{buildroot}%{_fontdir}
 71 | install -m 0644 -p %{SOURCE3} %{buildroot}%{_fontdir}
 72 | install -m 0644 -p %{SOURCE4} %{buildroot}%{_fontdir}
 73 | install -m 0644 -p %{SOURCE5} %{buildroot}%{_fontdir}
 74 | install -m 0644 -p %{SOURCE6} %{buildroot}%{_fontdir}
 75 | install -m 0644 -p %{SOURCE7} %{buildroot}%{_fontdir}
 76 | install -m 0644 -p %{SOURCE8} %{buildroot}%{_fontdir}
 77 | install -m 0644 -p %{SOURCE9} %{buildroot}%{_fontdir}
 78 | install -m 0644 -p %{SOURCE10} %{buildroot}%{_fontdir}
 79 | install -m 0644 -p %{SOURCE11} %{buildroot}%{_fontdir}
 80 | install -m 0644 -p %{SOURCE12} %{buildroot}%{_fontdir}
 81 | install -m 0644 -p %{SOURCE13} %{buildroot}%{_fontdir}
 82 | install -m 0644 -p %{SOURCE14} %{buildroot}%{_fontdir}
 83 | install -m 0644 -p %{SOURCE15} %{buildroot}%{_fontdir}
 84 | install -m 0644 -p %{SOURCE16} %{buildroot}%{_fontdir}
 85 | install -m 0644 -p %{SOURCE17} %{buildroot}%{_fontdir}
 86 | install -m 0644 -p %{SOURCE18} %{buildroot}%{_fontdir}
 87 | install -m 0644 -p %{SOURCE19} %{buildroot}%{_fontdir}
 88 | install -m 0644 -p %{SOURCE20} %{buildroot}%{_fontdir}
 89 | install -m 0644 -p %{SOURCE21} %{buildroot}%{_fontdir}
 90 | install -m 0644 -p %{SOURCE22} %{buildroot}%{_fontdir}
 91 | install -m 0644 -p %{SOURCE23} %{buildroot}%{_fontdir}
 92 | install -m 0644 -p %{SOURCE24} %{buildroot}%{_fontdir}
 93 | install -m 0644 -p %{SOURCE25} %{buildroot}%{_fontdir}
 94 | install -m 0644 -p %{SOURCE26} %{buildroot}%{_fontdir}
 95 | install -m 0644 -p %{SOURCE27} %{buildroot}%{_fontdir}
 96 | install -m 0644 -p %{SOURCE28} %{buildroot}%{_fontdir}
 97 | install -m 0644 -p %{SOURCE29} %{buildroot}%{_fontdir}
 98 | 
 99 | 
100 | %changelog
101 | * Mon Nov 20 2017 Adrien Vergé <adrienverge@gmail.com>
102 | - Add bold and italic versions when available
103 | 
104 | * Fri Jun 23 2017 Adrien Vergé <adrienverge@gmail.com>
105 | - Initial package spec
106 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import argparse
 3 | from src.constructor import generate_presentation 
 4 | from src.prompt_configs import en_gigachat_config, ru_gigachat_config
 5 | from src.gigachat import giga_generate
 6 | from src.kandinsky import api_k31_generate
 7 | from src.font import Font
 8 | 
 9 | def main():
10 |     parser = argparse.ArgumentParser(
11 |         description='Generate a presentation.'
12 |     )
13 |     parser.add_argument(
14 |         '-d', '--description', 
15 |         type=str, 
16 |         required=True, 
17 |         help='Description of the presentation'
18 |     )
19 |     parser.add_argument(
20 |         '-l', '--language', 
21 |         type=str, 
22 |         choices=['en', 'ru'], 
23 |         default='en', 
24 |         help='Language for the presentation. Choices are: English, Russian. Default is English.'
25 |     )
26 |     args = parser.parse_args()
27 | 
28 |     # Select the appropriate prompt configuration based on the language argument
29 |     if args.language == 'en':
30 |         prompt_config = en_gigachat_config
31 |     elif args.language == 'ru':
32 |         prompt_config = ru_gigachat_config
33 |     else: 
34 |         # set default to prevent interruptions in unexpected scenario
35 |         print("only 'en' and 'ru' configs are available, settings default 'en'")
36 |         prompt_config = en_gigachat_config
37 | 
38 |     fonts_dir = "./fonts"
39 |     logs_dir = "./logs"
40 |     
41 |     font = Font(fonts_dir)
42 |     font.set_random_font() 
43 |     
44 |     output_dir = f'{logs_dir}/{int(time.time())}'
45 |     
46 |     generate_presentation(
47 |         llm_generate=giga_generate, 
48 |         generate_image=api_k31_generate,
49 |         prompt_config=prompt_config,    
50 |         description=args.description,
51 |         font=font,
52 |         output_dir=output_dir,
53 |     )
54 | 
55 | if __name__ == "__main__": 
56 |     main()


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | googletrans==4.0.0-rc1
 2 | httpx
 3 | fastapi
 4 | gradio
 5 | huggingface_hub
 6 | numpy
 7 | Pillow
 8 | pydantic
 9 | python-dotenv
10 | python_pptx
11 | Requests
12 | scipy
13 | setuptools
14 | scikit-image
15 | torch
16 | torchvision
17 | tqdm
18 | uvicorn
19 | 
20 | einops
21 | accelerate
22 | sentencepiece
23 | -r Kandinsky-3/requirements.txt


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ai-forever/slides_generator/5c614a03b014a233270256dac937e216cdff3977/src/__init__.py


--------------------------------------------------------------------------------
/src/constructor.py:
--------------------------------------------------------------------------------
  1 | from pptx import Presentation
  2 | from pptx.util import Inches
  3 | from pptx.oxml.xmlchemy import OxmlElement
  4 | from pptx.dml.color import RGBColor
  5 | from pptx.enum.text import PP_ALIGN, MSO_ANCHOR, MSO_AUTO_SIZE
  6 | 
  7 | import random
  8 | import os
  9 | from PIL import Image
 10 | from typing import List, Callable
 11 | 
 12 | from .llm_utils import llm_generate_titles, llm_generate_text, llm_generate_image_prompt, llm_generate_background_prompt
 13 | from .prompt_configs import PromptConfig
 14 | from .slides import generate_slide
 15 | from .font import Font
 16 | 
 17 | import tqdm
 18 | 
 19 | 
 20 | def generate_presentation(
 21 |     llm_generate: Callable[[str], str],
 22 |     generate_image: Callable[[str, int, int], Image.Image],
 23 |     prompt_config: PromptConfig,
 24 |     description: str,
 25 |     font:Font, 
 26 |     output_dir: str,
 27 | ) -> Presentation:
 28 |     """
 29 |     Generate a PowerPoint presentation based on a description using language and image models.
 30 | 
 31 |     Args:
 32 |         llm_generate (Callable[[str], str]): Function to generate text using a language model.
 33 |         generate_image (Callable[[str, int, int], Image.Image]): Function to generate images.
 34 |         prompt_config (PromptConfig): Configuration for prompts.
 35 |         description (str): Description of the presentation.
 36 |         output_dir (str): Directory to save generated images and presentation.
 37 |         font (Font): Font object to manage font styles and paths.
 38 |     Returns:
 39 |         Presentation: The generated PowerPoint presentation.
 40 |     """
 41 |     os.makedirs(os.path.join(output_dir, 'backgrounds'), exist_ok=True)
 42 |     os.makedirs(os.path.join(output_dir, 'pictures'), exist_ok=True)
 43 |     presentation = Presentation()
 44 |     presentation.slide_height = Inches(9)
 45 |     presentation.slide_width = Inches(16)
 46 | 
 47 |     pbar = tqdm.tqdm(total=4, desc="Presentation goes brrr...")
 48 |     
 49 |     pbar.set_description("Generating titles for presentation")
 50 |     titles = llm_generate_titles(llm_generate, description, prompt_config)
 51 |     pbar.update(1)
 52 |     
 53 |     pbar.set_description("Generating text for slides")
 54 |     texts = [None] + llm_generate_text(
 55 |         llm_generate, 
 56 |         description, 
 57 |         titles[1:], 
 58 |         prompt_config
 59 |     )
 60 |     pbar.update(1)
 61 | 
 62 |     # postfix added to keywords describing presentation
 63 |     background_style = random.choice(prompt_config.background_styles)
 64 |     
 65 |     picture_paths = []
 66 |     background_paths = []
 67 |     pbar.set_description("Generating images for slides")
 68 |     for t_index, (title, text) in enumerate(zip(titles, texts)):
 69 |         # Decide randomly presence of image on current slide
 70 |         if random.choices(
 71 |             [True, False], 
 72 |             # side-image/plain-text with background image
 73 |             weights=[4, 1], 
 74 |         k=1)[0] and text:
 75 |             image_width, image_height = random.choice(
 76 |                 [(768, 1344), (1024, 1024)]
 77 |             )
 78 |             caption_prompt = llm_generate_image_prompt(
 79 |                 llm_generate, 
 80 |                 description, 
 81 |                 title, 
 82 |                 prompt_config
 83 |             )
 84 |             picture = generate_image(
 85 |                 prompt=caption_prompt, 
 86 |                 width=image_width, 
 87 |                 height=image_height
 88 |             )
 89 |             picture_path = os.path.join(
 90 |                 output_dir, 
 91 |                 'pictures', 
 92 |                 f'{t_index:06}.png'
 93 |             )
 94 |             picture.save(picture_path)
 95 |         else:
 96 |             picture_path = None
 97 |         picture_paths.append(picture_path)
 98 | 
 99 |         if picture_path is None:
100 |             background_width, background_height = 1344, 768
101 |             background_prompt = llm_generate_background_prompt(
102 |                 llm_generate, 
103 |                 description, 
104 |                 title, 
105 |                 prompt_config,
106 |                 background_style
107 |             )
108 |             background = generate_image(
109 |                 prompt=background_prompt, 
110 |                 width=background_width, 
111 |                 height=background_height
112 |             )
113 |             background_path = os.path.join(
114 |                 output_dir, 
115 |                 'backgrounds', 
116 |                 f'{t_index:06}.png'
117 |             )
118 |             background.save(background_path)
119 |         else:
120 |             background_path = None
121 |         background_paths.append(background_path)
122 |     pbar.update(1)
123 |     
124 |     pbar.set_description("Packing presentation")
125 |     
126 |     for index in range(len(titles)):
127 |         title = titles[index]
128 |         text = texts[index]
129 |         picture_path = picture_paths[index]
130 |         background_path = background_paths[index]
131 | 
132 |         generate_slide(
133 |             presentation=presentation,
134 |             title=title,
135 |             text=text,
136 |             picture_path=picture_path,
137 |             background_path=background_path,
138 |             font=font,
139 |         )
140 |     pbar.update(1)
141 |     
142 |     pbar.set_description("Done")
143 |     output_path = os.path.join(output_dir, 'presentation.pptx')
144 |     presentation.save(output_path)
145 |     return presentation


--------------------------------------------------------------------------------
/src/font.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import random
  3 | from typing import Optional
  4 | 
  5 | class Font:
  6 |     def __init__(self, fonts_dir: str, max_size: int = 66):
  7 |         """
  8 |         Initialize the Font class with a directory containing font files.
  9 | 
 10 |         Args:
 11 |             fonts_dir (str): Path to the directory containing font files.
 12 |             max_size (int): Maximum font size to use for fitting text.
 13 |         """
 14 |         self.fonts_dir = fonts_dir
 15 |         self.font_name = None  # Default font
 16 |         self.set_random_font()
 17 |         self.max_size = max_size
 18 | 
 19 |     def set_font(self, font_name: str = "Tahoma") -> None:
 20 |         """
 21 |         Set the font name to be used.
 22 | 
 23 |         Args:
 24 |             font_name (str): Name of the font to set (default is "Tahoma").
 25 |         """
 26 |         if self._find_font(font_name):
 27 |             self.font_name = font_name
 28 |         else:
 29 |             raise ValueError(f"Font '{font_name}' not found in '{self.fonts_dir}'.")
 30 | 
 31 |     def set_random_font(self) -> None:
 32 |         """
 33 |         Set a random font from the fonts directory. The chosen font must have both
 34 |         basic and bold styles available.
 35 |         """
 36 |         available_fonts = self._find_available_fonts()
 37 |         if not available_fonts:
 38 |             raise ValueError("No fonts with both basic and bold styles found.")
 39 | 
 40 |         self.font_name = random.choice(available_fonts)
 41 | 
 42 |     @property
 43 |     def basic(self) -> Optional[str]:
 44 |         """
 45 |         Get the path of the basic font style based on the current font name.
 46 | 
 47 |         Returns:
 48 |             Optional[str]: The full path to the basic font style or None if not found.
 49 |         """
 50 |         return self._find_font(f'{self.font_name}')
 51 | 
 52 |     @property
 53 |     def bold(self) -> Optional[str]:
 54 |         """
 55 |         Get the path of the bold font style based on the current font name.
 56 | 
 57 |         Returns:
 58 |             Optional[str]: The full path to the bold font style or None if not found.
 59 |         """
 60 |         return self._find_font(f'{self.font_name}Bd')
 61 | 
 62 |     @property
 63 |     def italic(self) -> Optional[str]:
 64 |         """
 65 |         Get the path of the italic font style based on the current font name.
 66 | 
 67 |         Returns:
 68 |             Optional[str]: The full path to the italic font style or None if not found.
 69 |         """
 70 |         return self._find_font(f'{self.font_name}It')
 71 | 
 72 |     @property
 73 |     def italic_bold(self) -> Optional[str]:
 74 |         """
 75 |         Get the path of the italic bold font style based on the current font name.
 76 | 
 77 |         Returns:
 78 |             Optional[str]: The full path to the italic bold 
 79 |                         font style or None if not found.
 80 |         """
 81 |         return self._find_font(f'{self.font_name}BdIt')
 82 | 
 83 |     def _find_font(self, font_name: str) -> Optional[str]:
 84 |         """
 85 |         Find a font file in the fonts directory by font name.
 86 | 
 87 |         Args:
 88 |             font_name (str): The font name to find.
 89 | 
 90 |         Returns:
 91 |             Optional[str]: The full path to the font file if found, None otherwise.
 92 |         """
 93 |         if not font_name.endswith(".ttf"):
 94 |             font_name = f'{font_name}.ttf'
 95 |             
 96 |         for filename in os.listdir(self.fonts_dir):
 97 |             if font_name == filename:
 98 |                 file_path = os.path.join(self.fonts_dir, filename)
 99 |                 return file_path
100 |         return None
101 | 
102 |     def _find_available_fonts(self) -> list:
103 |         """
104 |         Find all available fonts in the fonts directory 
105 |         that have both basic and bold styles.
106 | 
107 |         Returns:
108 |             list: A list of font names (without file extension) 
109 |                     that have both basic and bold styles.
110 |         """
111 |         fonts = set()
112 |         for filename in os.listdir(self.fonts_dir):
113 |             if filename.endswith(".ttf"):
114 |                 font_name = filename[:-4]  # Remove the .ttf extension
115 |                 if font_name.endswith("Bd"):
116 |                     basic_font = font_name[:-2]
117 |                     if os.path.exists(os.path.join(self.fonts_dir, f"{basic_font}.ttf")):
118 |                         fonts.add(basic_font)
119 |                 else:
120 |                     bold_font = f"{font_name}Bd"
121 |                     if os.path.exists(os.path.join(self.fonts_dir, f"{bold_font}.ttf")):
122 |                         fonts.add(font_name)
123 |         return list(fonts)


--------------------------------------------------------------------------------
/src/gigachat.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import base64
  3 | import uuid
  4 | import json
  5 | import time
  6 | from typing import Dict, Optional, Any
  7 | from dotenv import load_dotenv
  8 | import os
  9 | 
 10 | # Load environment variables from .env file
 11 | load_dotenv()
 12 | 
 13 | AUTH_TOKEN = os.getenv("AUTH_TOKEN")
 14 | COOKIE = os.getenv("COOKIE")
 15 | 
 16 | # print(f"AUTH_TOKEN: {AUTH_TOKEN}")
 17 | # print(f"COOKIE: {COOKIE}")
 18 | 
 19 | def get_auth_token(timeout: float = 2) -> Dict[str, Any]:
 20 |     """
 21 |     Get authentication token.
 22 | 
 23 |     Args:
 24 |         timeout (float): Timeout duration in seconds.
 25 | 
 26 |     Returns:
 27 |         Dict[str, Any]: Dictionary containing the access token and its expiration time.
 28 |     """
 29 |     url = "https://beta.saluteai.sberdevices.ru/v1/token"
 30 |     payload = 'scope=GIGACHAT_API_CORP'
 31 |     headers = {
 32 |         'Content-Type': 'application/x-www-form-urlencoded',
 33 |         'Accept': 'application/json',
 34 |         'RqUID': str(uuid.uuid4()),
 35 |         'Cookie': COOKIE,
 36 |         'Authorization': f'Basic {AUTH_TOKEN}'
 37 |     }
 38 |     response = requests.post(url, headers=headers, data=payload, timeout=timeout)
 39 |     response_dict = response.json()
 40 |     return {
 41 |         'access_token': response_dict['tok'],
 42 |         'expires_at': response_dict['exp']
 43 |     }
 44 | 
 45 | def check_auth_token(token_data: Dict[str, Any]) -> bool:
 46 |     """
 47 |     Check if the authentication token is valid.
 48 | 
 49 |     Args:
 50 |         token_data (Dict[str, Any]): Dictionary containing token data.
 51 | 
 52 |     Returns:
 53 |         bool: True if the token is valid, False otherwise.
 54 |     """
 55 |     return token_data['expires_at'] - time.time() > 5
 56 | 
 57 | token_data: Optional[Dict[str, Any]] = None
 58 | 
 59 | def get_response(
 60 |     prompt: str,
 61 |     model: str,
 62 |     timeout: int = 120,
 63 |     n: int = 1,
 64 |     fuse_key_word: Optional[str] = None,
 65 |     use_giga_censor: bool = False,
 66 |     max_tokens: int = 512,
 67 | ) -> requests.Response:
 68 |     """
 69 |     Send a text generation request to the API.
 70 | 
 71 |     Args:
 72 |         prompt (str): The input prompt.
 73 |         model (str): The model to be used for generation.
 74 |         timeout (int): Timeout duration in seconds.
 75 |         n (int): Number of responses.
 76 |         fuse_key_word (Optional[str]): Additional keyword to include in the prompt.
 77 |         use_giga_censor (bool): Whether to use profanity filtering.
 78 |         max_tokens (int): Maximum number of tokens in the response.
 79 | 
 80 |     Returns:
 81 |         requests.Response: API response.
 82 |     """
 83 |     global token_data
 84 |     
 85 |     url = "https://beta.saluteai.sberdevices.ru/v1/chat/completions"
 86 |     payload = json.dumps({
 87 |         "model": model,
 88 |         "messages": [
 89 |             {
 90 |                 "role": "user",
 91 |                 "content": ' '.join([fuse_key_word, prompt]) if fuse_key_word else prompt
 92 |             }
 93 |         ],
 94 |         "temperature": 0.87,
 95 |         "top_p": 0.47,
 96 |         "n": n,
 97 |         "stream": False,
 98 |         "max_tokens": max_tokens,
 99 |         "repetition_penalty": 1.07,
100 |         "profanity_check": use_giga_censor
101 |     })
102 | 
103 |     if token_data is None or not check_auth_token(token_data): 
104 |         token_data = get_auth_token()
105 |     
106 |     headers = {
107 |         'Content-Type': 'application/json',
108 |         'Accept': 'application/json',
109 |         'Authorization': f'Bearer {token_data["access_token"]}'
110 |     }
111 |     response = requests.post(url, headers=headers, data=payload, timeout=timeout)
112 |     return response
113 |    
114 | def giga_generate(
115 |     prompt: str, 
116 |     model_version: str = "GigaChat-Pro", 
117 |     max_tokens: int = 2048
118 | ) -> str:
119 |     """
120 |     Generate text using the GigaChat model.
121 | 
122 |     Args:
123 |         prompt (str): The input prompt.
124 |         model_version (str): The version of the model to use.
125 |         max_tokens (int): Maximum number of tokens in the response.
126 | 
127 |     Returns:
128 |         str: Generated text.
129 |     """
130 |     response = get_response(
131 |         prompt,
132 |         model_version,
133 |         use_giga_censor=False,
134 |         max_tokens=max_tokens,
135 |     )
136 |     response_dict = response.json()
137 | 
138 |     if response_dict['choices'][0]['finish_reason'] == 'blacklist':
139 |         print('GigaCensor triggered!')
140 |         return 'Censored Text'
141 |     else:
142 |         response_str = response_dict['choices'][0]['message']['content']
143 |         return response_str


--------------------------------------------------------------------------------
/src/kandinsky.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | sys.path.append('Kandinsky-3')
 3 | 
 4 | import torch
 5 | from kandinsky3 import get_T2I_pipeline
 6 | from fastapi import FastAPI, HTTPException
 7 | from pydantic import BaseModel
 8 | from typing import Optional
 9 | import base64
10 | from io import BytesIO
11 | from PIL import Image
12 | import uvicorn
13 | 
14 | import time
15 | from fastapi import FastAPI, HTTPException
16 | from pydantic import BaseModel
17 | import base64
18 | import requests
19 | 
20 | device_map = torch.device('cuda:0')
21 | dtype_map = {
22 |     'unet': torch.float32,
23 |     'text_encoder': torch.float16,
24 |     'movq': torch.float32,
25 | }
26 | 
27 | # Initialize the FastAPI app
28 | app = FastAPI()
29 | 
30 | # Define the request model
31 | class GenerateImageRequest(BaseModel):
32 |     prompt: str
33 |     width: Optional[int] = 1024
34 |     height: Optional[int] = 1024
35 | 
36 | # Define the response model
37 | class GenerateImageResponse(BaseModel):
38 |     image_base64: str
39 | 
40 | # Define the endpoint
41 | @app.post("/k31/", response_model=GenerateImageResponse)
42 | async def generate_image(request: GenerateImageRequest):
43 |     try:
44 |         # Generate the image using the pipeline
45 |         pil_image = t2i_pipe(request.prompt, width=request.width, height=request.height, steps=50)[0]
46 | 
47 |         # Resize the image if necessary
48 |         if pil_image.size != (request.width, request.height):
49 |             pil_image = pil_image.resize((request.width, request.height))
50 |         
51 |         # Convert the PIL image to base64
52 |         buffered = BytesIO()
53 |         pil_image.save(buffered, format="PNG")
54 |         image_base64 = base64.b64encode(buffered.getvalue()).decode('utf-8')
55 |         
56 |         # Return the response
57 |         return GenerateImageResponse(image_base64=image_base64)
58 | 
59 |     except Exception as e:
60 |         raise HTTPException(status_code=500, detail=str(e))
61 | 
62 | def api_k31_generate(prompt, width=1024, height=1024, url = "http://0.0.0.0:8188/k31/"):
63 |     # Define the text message and image parameters
64 |     data = {
65 |         "prompt": prompt,
66 |         "width": width,
67 |         "height": height
68 |     }
69 |     
70 |     # Send the POST request
71 |     response = requests.post(url, json=data)
72 |     
73 |     # Check if the request was successful
74 |     if response.status_code == 200:
75 |         # Extract the base64 encoded image from the response
76 |         image_base64 = response.json()["image_base64"]
77 |         
78 |         # You can further process the image here, for example, decode it from base64
79 |         decoded_image = Image.open(BytesIO(base64.b64decode(image_base64)))
80 |         
81 |         return decoded_image
82 |     else:
83 |         print("Error:", response.text)
84 |         
85 | # Run the FastAPI app
86 | if __name__ == "__main__":
87 |     t2i_pipe = get_T2I_pipeline(
88 |         device_map, dtype_map,
89 |     )
90 |     uvicorn.run(app, host="0.0.0.0", port=8188)
91 | 


--------------------------------------------------------------------------------
/src/llm_utils.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Callable
  2 | from googletrans import Translator
  3 | import random
  4 | 
  5 | from src.prompt_configs import PromptConfig, prefix
  6 | 
  7 | translator = Translator()
  8 | 
  9 | def get_translation(text: str, dest: str = 'en') -> str:
 10 |     return translator.translate(text, dest=dest).text
 11 | 
 12 | def llm_generate_titles(
 13 |     llm_generate: Callable[[str], str], 
 14 |     description: str, 
 15 |     prompt_config: PromptConfig,
 16 | ) -> List[str]:
 17 |     """
 18 |     Generate presentation slide titles using a language model.
 19 | 
 20 |     Args:
 21 |         llm_generate (Callable[[str], str]): Function to generate text using a language model.
 22 |         description (str): Description of the presentation.
 23 |         prompt_config (PromptConfig): Configuration for prompts.
 24 | 
 25 |     Returns:
 26 |         List[str]: List of generated slide titles.
 27 |     """
 28 |     prompt = prompt_config.title_prompt.format(
 29 |         description=description
 30 |     )
 31 |     titles_str = llm_generate(prompt)
 32 |     titles = []
 33 |     for title in titles_str.split("\n"):
 34 |         sep_index = title.index('. ') + 1
 35 |         title = title.strip()[sep_index:]
 36 |         title = title.replace('.', '')
 37 |         title = title.replace('\n', '')
 38 |         if prefix in title.lower():
 39 |             title = title[
 40 |                 title.lower().index(prefix)+len(prefix):
 41 |             ]
 42 |         titles.append(title)
 43 |     return titles
 44 | 
 45 | def llm_generate_text(
 46 |     llm_generate: Callable[[str], str], 
 47 |     description: str, 
 48 |     titles: List[str], 
 49 |     prompt_config: PromptConfig
 50 | ) -> List[str]:
 51 |     """
 52 |     Generate text for each slide title using a language model.
 53 | 
 54 |     Args:
 55 |         llm_generate (Callable[[str], str]): Function to generate text using a language model.
 56 |         description (str): Description of the presentation.
 57 |         titles (List[str]): List of slide titles.
 58 |         prompt_config (PromptConfig): Configuration for prompts.
 59 | 
 60 |     Returns:
 61 |         List[str]: List of generated texts for each slide.
 62 |     """
 63 |     texts = []
 64 |     for title in titles:
 65 |         query = prompt_config.text_prompt.format(description=description, title=title)
 66 |         text = llm_generate(query)
 67 |         if prefix in text.lower():
 68 |             text = text[text.lower().index(prefix)+len(prefix):]
 69 |             text = text.replace('\n', '') 
 70 |         texts.append(text)
 71 |     return texts
 72 | 
 73 | def llm_generate_image_prompt(
 74 |     llm_generate: Callable[[str], str], 
 75 |     description: str, 
 76 |     title: str, 
 77 |     prompt_config: PromptConfig
 78 | ) -> str:
 79 |     """
 80 |     Generate an image prompt for a slide using a language model and translate it.
 81 | 
 82 |     Args:
 83 |         llm_generate (Callable[[str], str]): Function to generate text using a language model.
 84 |         description (str): Description of the presentation.
 85 |         title (str): Slide title.
 86 |         prompt_config (PromptConfig): Configuration for prompts.
 87 | 
 88 |     Returns:
 89 |         str: Translated image prompt.
 90 |     """
 91 |     query = prompt_config.image_prompt.format(description=description, title=title)
 92 |     prompt = llm_generate(query)
 93 |     if prefix in prompt: 
 94 |         prompt = prompt[prompt.lower().index(prompt)+len(prompt):]
 95 |         prompt = prompt.replace('\n', '')
 96 |     return get_translation(prompt)
 97 | 
 98 | def llm_generate_background_prompt(
 99 |     llm_generate: Callable[[str], str], 
100 |     description: str, 
101 |     title: str, 
102 |     prompt_config: PromptConfig, 
103 |     background_style: str = ''
104 | ) -> str:
105 |     """
106 |     Generate a background prompt for a slide using a language model and translate it.
107 | 
108 |     Args:
109 |         llm_generate (Callable[[str], str]): Function to generate text using a language model.
110 |         description (str): Description of the presentation.
111 |         title (str): Slide title.
112 |         prompt_config (PromptConfig): Configuration for prompts.
113 | 
114 |     Returns:
115 |         str: Translated background prompt.
116 |     """
117 |     query = prompt_config.background_prompt.format(description=description, title=title)
118 |     
119 |     keywords = llm_generate(query)
120 |     background_prompt = f'{keywords}, {background_style}'
121 |         
122 |     return get_translation(background_prompt)
123 | 


--------------------------------------------------------------------------------
/src/prompt_configs/__init__.py:
--------------------------------------------------------------------------------
1 | from .prompt_config import PromptConfig, prefix
2 | from .ru_gigachat_config import ru_gigachat_config
3 | from .en_gigachat_config import en_gigachat_config


--------------------------------------------------------------------------------
/src/prompt_configs/en_gigachat_config.py:
--------------------------------------------------------------------------------
  1 | from .prompt_config import PromptConfig, prefix
  2 | 
  3 | en_gigachat_config = PromptConfig(
  4 |     title_prompt = (
  5 |         'You are given a presentation description: "{description}". '
  6 |         'Based on this description and examples, generate slide titles for the presentation. '
  7 |         'The title should be brief, no more than 4 words. '
  8 |         'Answer in English only. '
  9 |         'Present the response as a numbered list. '
 10 |         'Examples:\n '
 11 |         'Query: Description of a presentation about marketing strategy for a new product.\n'
 12 |         '1. Introduction\n '
 13 |         '2. Marketing Goals\n '
 14 |         '3. Market Analysis\n '
 15 |         '4. Budget\n '
 16 |         '5. Conclusion\n '
 17 |         'Query: Presentation about company achievements over the past year.\n'
 18 |         '1. Welcome\n '
 19 |         '2. General Achievements\n '
 20 |         '3. Financial Results\n '
 21 |         '4. Successful Projects\n '
 22 |         '5. Team Development\n '
 23 |         '6. Social Initiatives\n '
 24 |         '7. Future Plans\n '
 25 |         '8. Acknowledgments\n '
 26 |         '9. Q&A\n '
 27 |         'Query: Presentation about new technologies in manufacturing.\n'
 28 |         '1. Introduction\n '
 29 |         '2. Current Technologies\n '
 30 |         '3. New Developments\n '
 31 |         '4. Implementation Examples\n '
 32 |         '5. Future Trends\n '
 33 |         '6. Conclusion\n '
 34 |         '7. Discussion\n '
 35 |         'Response:\n'
 36 |     ),
 37 |     text_prompt = (
 38 |         'You are given a presentation description: "{description}". '
 39 |         'Write one sentence no more than 20 words for a slide with the title "{title}". '
 40 |         'Answer in English only. '
 41 |         f'Write only the final text, starting with "{prefix} ". '
 42 |         'Examples:\n'
 43 |         f'{prefix} The 20% sales increase is attributed to the implementation of the new marketing strategy.\n'
 44 |         f'{prefix} Innovative technologies have improved manufacturing efficiency by 30%.\n'
 45 |         f'{prefix} New customer engagement approaches have increased satisfaction levels by 15%.\n'
 46 |         f'{prefix} This year, the company launched three new products that became market leaders.\n'
 47 |         'Response:\n'
 48 |     ),
 49 |     image_prompt = (
 50 |         'You are given a presentation description: "{description}". '
 51 |         'Generate a detailed description of an aesthetic image for a slide with the title: "{title}". '
 52 |         'The description should be long and highly detailed, covering all aspects of the visual elements. '
 53 |         'Exclude numerical values, text, graphs, company names, and similar content. '
 54 |         'Avoid using text on the image. '
 55 |         'Answer in English only. '
 56 |         'Make it visually pleasing and contextually appropriate. '
 57 |         'Start with the word "Description: ". '
 58 |         'Examples:\n'
 59 |         f'{prefix} A spacious conference room with a modern design, glass walls letting in plenty of natural light, a long wooden table in the center with laptops and documents, business people in formal attire sitting around, and a cityscape visible through the windows.\n'
 60 |         f'{prefix} A forest trail surrounded by tall trees with green leaves, fallen leaves on the ground, sunlight filtering through the foliage creating a play of light and shadow, animal tracks visible on the path, and the distant sound of a river.\n'
 61 |         f'{prefix} A busy street in the city center, with high modern buildings featuring glass facades on both sides, many pedestrians walking, some rushing and others strolling, cars and buses moving along the street, and a clear sky with a few clouds.\n'
 62 |         f'{prefix} A cozy café with wooden tables and soft chairs, paintings of nature on the walls, large windows letting in plenty of light, patrons sitting at tables, some working on laptops and others chatting over coffee, and a counter with desserts and beverages.\n'
 63 |         'Response:\n'
 64 |     ),
 65 |     background_prompt = (
 66 |         'Based on the presentation description: "{description}" '
 67 |         'and the current slide title: "{title}". '
 68 |         'Use in-context learning to generate 4 key words related to the content of the slide. '
 69 |         'Write the key words separated by commas. '
 70 |         'Examples:\n'
 71 |         'Input: Presentation about the latest trends in digital marketing.\n'
 72 |         'Title: Emerging Technologies\n'
 73 |         f'{prefix} innovation, digital, trends, technology\n\n'
 74 |         'Input: Presentation on strategies for improving customer service.\n'
 75 |         'Title: Enhancing Engagement\n'
 76 |         f'{prefix} customer, engagement, strategies, improvement\n\n'
 77 |         'Input: Presentation on the impact of climate change on agriculture.\n'
 78 |         'Title: Environmental Challenges\n'
 79 |         f'{prefix} climate, agriculture, impact, sustainability\n\n'
 80 |         'Input: Presentation on the benefits of remote work for productivity.\n'
 81 |         'Title: Work Efficiency\n'
 82 |         f'{prefix} remote, productivity, benefits, efficiency\n'
 83 |         'Response:\n'
 84 |     ),
 85 |     # List of strings!!!
 86 |     background_styles = [
 87 |         (
 88 |             'Gradient. WITHOUT TEXT, Vectors style, '
 89 |             'Gradient dip, More game with colors, Smooth transition. '
 90 |         ),
 91 |         (
 92 |             'Abstract. Clean lines, Modern feel, '
 93 |             'Minimalistic, Soft colors, Elegant look. '
 94 |         ),
 95 |         (
 96 |             'Nature-inspired. Soft green tones, '
 97 |             'Earthy feel, Natural textures, Organic look. '
 98 |         ),
 99 |         (
100 |             'Technology. Futuristic design, Blue tones, '
101 |             'Circuit patterns, Sleek lines, High-tech feel. '
102 |         ),
103 |         (
104 |             'Corporate. Professional look, Subtle gradients, '
105 |             'Clean and polished, Neutral colors, Business-oriented. '
106 |         ),
107 |         (
108 |             'Retro. Bold colors, Geometric shapes, '
109 |             'Vintage feel, Nostalgic design, Playful patterns. '
110 |         ),
111 |         (
112 |             'Minimalist. White space, Simple shapes, '
113 |             'Clean and clear, Monochrome tones, Modern elegance. '
114 |         ),
115 |         (
116 |             'Art Deco. Rich textures, Metallic accents, '
117 |             'Geometric patterns, Glamorous style, 1920s influence. '
118 |         ),
119 |         (
120 |             'Urban. Graffiti art, Vibrant colors, '
121 |             'Street style, Dynamic patterns, Energetic vibe. '
122 |         ),
123 |         (
124 |             'Watercolor. Soft brush strokes, Blended hues, '
125 |             'Artistic feel, Fluid shapes, Subtle transitions. '
126 |         ),
127 |         (
128 |             'Dark Mode. Deep black tones, Subtle contrasts, '
129 |             'Sophisticated look, Modern design, High contrast elements. '
130 |         ),
131 |         (
132 |             'Elegant. Rich colors, Decorative patterns, '
133 |             'Luxurious textures, Classic style, Refined details. '
134 |         ),
135 |         (
136 |             'Nature-inspired. Earthy colors, Leaf patterns, '
137 |             'Wood textures, Tranquil feel, Organic shapes. '
138 |         ),
139 |         (
140 |             'Dynamic. Bold contrasts, Energetic lines, '
141 |             'Motion feel, Vibrant colors, Modern design. '
142 |         )
143 |     ]
144 | )


--------------------------------------------------------------------------------
/src/prompt_configs/prompt_config.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | prefix = "prompt: "
 4 | 
 5 | class PromptConfig:
 6 |     def __init__(
 7 |         self, 
 8 |         title_prompt: str, 
 9 |         text_prompt: str, 
10 |         image_prompt: str, 
11 |         background_prompt: str,
12 |         background_styles: List[str],
13 |     ):
14 |         self.title_prompt = title_prompt
15 |         self.text_prompt = text_prompt
16 |         self.image_prompt = image_prompt
17 |         self.background_prompt = background_prompt
18 |         self.background_styles = background_styles


--------------------------------------------------------------------------------
/src/prompt_configs/ru_gigachat_config.py:
--------------------------------------------------------------------------------
  1 | from .prompt_config import PromptConfig, prefix
  2 | 
  3 | ru_gigachat_config = PromptConfig(
  4 |     title_prompt = (
  5 |         'тебе дано описание презентации: "{description}". '
  6 |         'На основе данного описания и примеров, сгенерируй заголовки слайдов презентации. '
  7 |         'Заголовок должен быть коротким, не более 4 слов. '
  8 |         'Представь ответ в виде пронумерованного списка. '
  9 |         'Примеры:\n '
 10 |         'Запрос: Описание презентации о стратегии маркетинга для нового продукта.\n'
 11 |         '1. Введение\n '
 12 |         '2. Цели маркетинга\n '
 13 |         '3. Анализ рынка\n '
 14 |         '4. Бюджет\n '
 15 |         '5. Заключение\n '
 16 |         'Запрос: Презентация о достижениях компании за прошлый год.\n'
 17 |         '1. Приветствие\n '
 18 |         '2. Общие достижения\n '
 19 |         '3. Финансовые результаты\n '
 20 |         '4. Успешные проекты\n '
 21 |         '5. Развитие команды\n '
 22 |         '6. Социальные инициативы\n '
 23 |         '7. Планы на будущее\n '
 24 |         '8. Благодарности\n '
 25 |         '9. Вопросы и ответы\n '
 26 |         'Запрос: Презентация о новых технологиях в производстве.\n'
 27 |         '1. Введение в тему\n '
 28 |         '2. Текущие технологии\n '
 29 |         '3. Новые разработки\n '
 30 |         '4. Примеры внедрения\n '
 31 |         '5. Будущие тенденции\n '
 32 |         '6. Заключение\n '
 33 |         '7. Дискуссия\n '
 34 |         'Ответ:\n'
 35 |     ),
 36 |     text_prompt = (
 37 |         'тебе дано описание презентации: "{description}". '
 38 |         'Напиши одно предложение не более 20 слов для слайда с заголовком "{title}". '
 39 |         f'Напиши только итоговый текст, начинай с "{prefix} ". '
 40 |         'Примеры:\n'
 41 |         f'{prefix} Увеличение продаж на 20% связано с внедрением новой маркетинговой стратегии.\n'
 42 |         f'{prefix} Инновационные технологии помогли повысить эффективность производства на 30%.\n'
 43 |         f'{prefix} Новые подходы к работе с клиентами увеличили уровень удовлетворенности на 15%.\n'
 44 |         f'{prefix} В этом году компания запустила три новых продукта, которые стали лидерами на рынке.\n'
 45 |         'Ответ:\n'
 46 |     ),
 47 |     image_prompt = (
 48 |         'тебе дано описание презентации: "{description}". '
 49 |         'Придумай детализированное описание эстетичной картинки для слайда с заголовком: "{title}". '
 50 |         'Описание должно быть длинным и супер детализированным, включающим все аспекты визуальной составляющей. '
 51 |         'Исключи цифровые значения, текст, графики, названия компаний и тому подобное. '
 52 |         'Избегай использования текста на изображении. '
 53 |         'Сделай его визуально приятным и подходящим контексту. '
 54 |         'Начни со слова "описание: ". '
 55 |         'Примеры:\n'
 56 |         f'{prefix} Просторный зал заседаний с современным дизайном, стеклянные стены пропускают много естественного света, в центре длинный деревянный стол с ноутбуками и документами, вокруг сидят деловые люди в официальной одежде, на заднем плане видна городская панорама через окна.\n'
 57 |         f'{prefix} Лесная тропа, окруженная высокими деревьями с зелеными листьями, на земле опавшая листва, солнечные лучи пробиваются сквозь листву, создавая игру света и теней, на тропе видны следы животных, вдали слышен шум реки.\n'
 58 |         f'{prefix} Оживленная улица в центре города, по обе стороны высокие современные здания со стеклянными фасадами, на улице много прохожих, некоторые спешат, другие медленно прогуливаются, между ними едут автомобили и автобусы, небо ясное с редкими облаками.\n'
 59 |         f'{prefix} Уютное кафе с деревянными столами и мягкими креслами, на стенах висят картины с изображением природы, большие окна пропускают много света, за столами сидят посетители, некоторые работают за ноутбуками, другие беседуют за чашкой кофе, на стойке видны десерты и напитки.\n'
 60 |         'Ответ:\n'
 61 |     ),
 62 |     background_prompt = (
 63 |         'На основании описания презентации: {description} '
 64 |         'и заголовка текущего слайда: "{title}". '
 65 |         'Используй in-context learning для генерации 4 ключевых слов. '
 66 |         'Напиши их через запятую. '
 67 |         'Примеры:\n'
 68 |         'инновации, рост, технологии, успех\n'
 69 |         'экология, устойчивость, природа, будущее\n'
 70 |         'развитие, обучение, достижения, цели\n'
 71 |         'ответственность, сообщество, проекты, партнерство\n'
 72 |         'Ответ:\n'
 73 |     ), 
 74 |     # List of strings!!!
 75 |     background_styles = [
 76 |         (
 77 |             'Gradient. WITHOUT TEXT, Vectors style, '
 78 |             'Gradient dip, More game with colors, Smooth transition. '
 79 |         ),
 80 |         (
 81 |             'Abstract. Clean lines, Modern feel, '
 82 |             'Minimalistic, Soft colors, Elegant look. '
 83 |         ),
 84 |         (
 85 |             'Nature-inspired. Soft green tones, '
 86 |             'Earthy feel, Natural textures, Organic look. '
 87 |         ),
 88 |         (
 89 |             'Technology. Futuristic design, Blue tones, '
 90 |             'Circuit patterns, Sleek lines, High-tech feel. '
 91 |         ),
 92 |         (
 93 |             'Corporate. Professional look, Subtle gradients, '
 94 |             'Clean and polished, Neutral colors, Business-oriented. '
 95 |         ),
 96 |         (
 97 |             'Retro. Bold colors, Geometric shapes, '
 98 |             'Vintage feel, Nostalgic design, Playful patterns. '
 99 |         ),
100 |         (
101 |             'Minimalist. White space, Simple shapes, '
102 |             'Clean and clear, Monochrome tones, Modern elegance. '
103 |         ),
104 |         (
105 |             'Art Deco. Rich textures, Metallic accents, '
106 |             'Geometric patterns, Glamorous style, 1920s influence. '
107 |         ),
108 |         (
109 |             'Urban. Graffiti art, Vibrant colors, '
110 |             'Street style, Dynamic patterns, Energetic vibe. '
111 |         ),
112 |         (
113 |             'Watercolor. Soft brush strokes, Blended hues, '
114 |             'Artistic feel, Fluid shapes, Subtle transitions. '
115 |         ),
116 |         (
117 |             'Dark Mode. Deep black tones, Subtle contrasts, '
118 |             'Sophisticated look, Modern design, High contrast elements. '
119 |         ),
120 |         (
121 |             'Elegant. Rich colors, Decorative patterns, '
122 |             'Luxurious textures, Classic style, Refined details. '
123 |         ),
124 |         (
125 |             'Nature-inspired. Earthy colors, Leaf patterns, '
126 |             'Wood textures, Tranquil feel, Organic shapes. '
127 |         ),
128 |         (
129 |             'Dynamic. Bold contrasts, Energetic lines, '
130 |             'Motion feel, Vibrant colors, Modern design. '
131 |         )
132 |     ]
133 | )


--------------------------------------------------------------------------------
/src/slides/__init__.py:
--------------------------------------------------------------------------------
1 | from .image_slide import generate_image_slide
2 | from .plain_text_slide import generate_plain_text_slide
3 | from .title_slide import generate_title_slide
4 | from .generate_slide import generate_slide


--------------------------------------------------------------------------------
/src/slides/generate_slide.py:
--------------------------------------------------------------------------------
 1 | from pptx import Presentation
 2 | from pptx.util import Inches
 3 | from pptx.oxml.xmlchemy import OxmlElement
 4 | from pptx.dml.color import RGBColor
 5 | from pptx.enum.text import PP_ALIGN, MSO_ANCHOR, MSO_AUTO_SIZE
 6 | 
 7 | from typing import List, Callable, Optional
 8 | from PIL import Image
 9 | import random
10 | import tqdm
11 | import os
12 | 
13 | 
14 | from .image_slide import generate_image_slide
15 | from .plain_text_slide import generate_plain_text_slide
16 | from .title_slide import generate_title_slide
17 | 
18 | from src.font import Font
19 | 
20 | def generate_slide(
21 |     presentation: Presentation,
22 |     title: str,
23 |     text: Optional[str] = None,
24 |     background_path: Optional[str] = None,
25 |     picture_path: Optional[str] = None,
26 |     font: Font = None, 
27 |     text_font_coeff:float=0.6,
28 | ) -> None:
29 |     """
30 |     Generate a slide in the presentation based on the provided content.
31 | 
32 |     Args:
33 |         presentation (Presentation): The presentation object.
34 |         title (str): The title of the slide.
35 |         text (Optional[str]): The text content for the slide (default is None).
36 |         picture_path (Optional[str]): The path to the picture for the slide (default is None).
37 |         background_path (Optional[str]): The path to the background image for the slide (default is None).
38 |         font (Font): Font object to manage font styles and paths.
39 |         text_font_coeff (float): Coefficient to adjust the font size 
40 |             of the text relative to the title (default is 0.6).
41 |     """
42 |     
43 |     if title and text is None and picture_path is None and background_path:
44 |         generate_title_slide(
45 |             presentation=presentation,
46 |             title=title,
47 |             font=font,
48 |             background_path=background_path,
49 |         )
50 |     elif title and text and background_path and picture_path is None:
51 |         generate_plain_text_slide(
52 |             presentation=presentation,
53 |             title=title,
54 |             text=text,
55 |             background_path=background_path,
56 |             font=font,
57 |             text_font_coeff=text_font_coeff,
58 |         )
59 |     elif title and text and picture_path and background_path is None:
60 |         generate_image_slide(
61 |             presentation=presentation,
62 |             title=title,
63 |             text=text,
64 |             picture_path=picture_path,
65 |             font=font,
66 |             text_font_coeff=text_font_coeff,
67 |         )


--------------------------------------------------------------------------------
/src/slides/image_slide.py:
--------------------------------------------------------------------------------
  1 | from pptx import Presentation
  2 | from pptx.util import Inches
  3 | from pptx.oxml.xmlchemy import OxmlElement
  4 | from pptx.dml.color import RGBColor
  5 | from pptx.enum.text import PP_ALIGN, MSO_ANCHOR, MSO_AUTO_SIZE
  6 | 
  7 | from typing import List, Callable, Optional
  8 | from PIL import Image
  9 | import random
 10 | import tqdm
 11 | import os
 12 | 
 13 | from src.font import Font
 14 | from .slide_utils import add_paragraph
 15 | 
 16 | def generate_text_title_image_right(
 17 |     presentation: Presentation,
 18 |     title: str,
 19 |     text: str,
 20 |     picture_path: str,
 21 |     font:Font, 
 22 |     text_font_coeff:float=0.6,
 23 | ) -> None:
 24 |     """
 25 |     Add a slide with title, text on the left, and picture on the right.
 26 | 
 27 |     Args:
 28 |     presentation (Presentation): PowerPoint presentation object
 29 |     title (str): Title for the slide
 30 |     text (str): Text content for the left side of the slide
 31 |     picture_path (str): Path to the picture to be inserted on the right side
 32 |     font (Font): Font object to manage font styles and paths.
 33 |     text_font_coeff (float): Coefficient to adjust the font size of the text relative to the title (default is 0.6).
 34 |     Returns:
 35 |     None
 36 |     """
 37 | 
 38 |     slide_layout = presentation.slide_layouts[6]
 39 |     slide = presentation.slides.add_slide(slide_layout)
 40 | 
 41 |     slide_height = 9
 42 |     slide_width = 16
 43 |     margin = min(slide_height, slide_width) / 18
 44 | 
 45 |     # image params
 46 |     # original image size
 47 |     x_pixels, y_pixels = Image.open(picture_path).size
 48 |     assert x_pixels == y_pixels or x_pixels < y_pixels, \
 49 |         'only vertical and square images can be used'
 50 |     # we need image height to be equal to slide height
 51 |     image_height = slide_height
 52 |     # x_pixels / y_pixels = image_width / image_height
 53 |     image_width = x_pixels / y_pixels * image_height
 54 |     image_left = slide_width - image_width
 55 |     image_top = 0
 56 | 
 57 |     slide.shapes.add_picture(
 58 |         picture_path,
 59 |         left=Inches(image_left),
 60 |         top=Inches(image_top),
 61 |         width=Inches(image_width),
 62 |         height=Inches(image_height),
 63 |     )
 64 | 
 65 |     # title params
 66 |     title_left = margin
 67 |     title_top = margin
 68 |     title_width = slide_width - image_width - 2*margin
 69 |     title_height = slide_height / 6
 70 | 
 71 |     title_box = slide.shapes.add_textbox(
 72 |         left=Inches(title_left),
 73 |         top=Inches(title_top),
 74 |         width=Inches(title_width),
 75 |         height=Inches(title_height),
 76 |     )
 77 |     title_frame = title_box.text_frame
 78 |     title_frame.clear()
 79 |     
 80 |     title_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
 81 |     title_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
 82 |     title_frame.word_wrap = False
 83 |     
 84 |     # title_paragraph = title_frame.add_paragraph()
 85 |     title_paragraph = add_paragraph(title_frame)
 86 |     title_paragraph.alignment = PP_ALIGN.CENTER
 87 |     title_paragraph.text = title
 88 | 
 89 |     for max_size in range(font.max_size)[::-5]: 
 90 |         try: 
 91 |             title_frame.fit_text(
 92 |                 font_file=font.bold, 
 93 |                 max_size=max_size,
 94 |                 bold=True,
 95 |             )
 96 |             break
 97 |         except: 
 98 |             pass
 99 | 
100 |     # text params
101 |     title_left = margin
102 |     text_top = title_height + margin*2
103 |     text_width = slide_width - image_width - 2*margin
104 |     text_height = slide_height - title_height - 3*margin
105 | 
106 |     text_box = slide.shapes.add_textbox(
107 |         left=Inches(title_left),
108 |         top=Inches(text_top),
109 |         width=Inches(text_width),
110 |         height=Inches(text_height),
111 |     )
112 |     text_frame = text_box.text_frame
113 |     text_frame.clear()
114 |     
115 |     text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
116 |     text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
117 |     text_frame.word_wrap = False
118 |     
119 |     # text_paragraph = text_frame.add_paragraph()
120 |     text_paragraph = add_paragraph(text_frame)
121 |     text_paragraph.text = text 
122 |     text_paragraph.alignment = PP_ALIGN.CENTER
123 | 
124 |     for max_size in range(int(max_size*text_font_coeff))[::-5]:
125 |         try: 
126 |             text_frame.fit_text(font_file=font.basic, max_size=max_size)
127 |             break
128 |         except: 
129 |             pass
130 |             
131 |     
132 | def generate_text_title_image_left(
133 |     presentation: Presentation,
134 |     title: str,
135 |     text: str,
136 |     picture_path: str,
137 |     font:Font,
138 |     text_font_coeff:float=0.6,
139 | ) -> None:
140 |     """
141 |     Add a slide with title, text on the left, and picture on the right.
142 | 
143 |     Args:
144 |         presentation (Presentation): PowerPoint presentation object
145 |         title (str): Title for the slide
146 |         text (str): Text content for the left side of the slide
147 |         picture_path (str): Path to the picture to be inserted on the right side
148 |         font (Font): Font object to manage font styles and paths.
149 |         text_font_coeff (float): Coefficient to adjust the font 
150 |             size of the text relative to the title (default is 0.6).
151 |     
152 |     Returns:
153 |         None
154 |     """
155 | 
156 |     slide_layout = presentation.slide_layouts[6]
157 |     slide = presentation.slides.add_slide(slide_layout)
158 | 
159 |     slide_height = 9
160 |     slide_width = 16
161 |     margin = min(slide_height, slide_width) / 18
162 | 
163 |     # image params
164 |     # original image size
165 |     x_pixels, y_pixels = Image.open(picture_path).size
166 |     assert x_pixels == y_pixels or x_pixels < y_pixels, \
167 |         'only vertical and square images can be used'
168 |     # we need image height to be equal to slide height
169 |     image_height = slide_height
170 |     # x_pixels / y_pixels = image_width / image_height
171 |     image_width = x_pixels / y_pixels * image_height
172 |     image_left = 0
173 |     image_top = 0
174 | 
175 |     slide.shapes.add_picture(
176 |         picture_path,
177 |         left=Inches(image_left),
178 |         top=Inches(image_top),
179 |         width=Inches(image_width),
180 |         height=Inches(image_height),
181 |     )
182 |     
183 |     # title params
184 |     title_left = image_width + margin 
185 |     title_top = margin
186 |     title_width = slide_width - image_width - 2 * margin
187 |     title_height = slide_height / 6
188 | 
189 |     title_box = slide.shapes.add_textbox(
190 |         left=Inches(title_left),
191 |         top=Inches(title_top),
192 |         width=Inches(title_width),
193 |         height=Inches(title_height),
194 |     )
195 |     title_frame = title_box.text_frame
196 |     title_frame.clear()
197 | 
198 |     title_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
199 |     title_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
200 |     title_frame.word_wrap = False
201 |     
202 |     # title_paragraph = title_frame.add_paragraph()
203 |     title_paragraph = add_paragraph(title_frame)
204 |     title_paragraph.text = title
205 |     title_paragraph.alignment = PP_ALIGN.CENTER
206 | 
207 |     for max_size in range(font.max_size)[::-5]: 
208 |         try: 
209 |             title_frame.fit_text(
210 |                 font_file=font.bold, 
211 |                 max_size=max_size,
212 |                 bold=True,
213 |             )
214 |             break
215 |         except: 
216 |             pass
217 |             
218 |     # text params
219 |     text_left = title_left
220 |     text_top = title_height + margin * 2
221 |     text_width = slide_width - image_width - 2 * margin
222 |     text_height = slide_height - title_height - 3 * margin
223 | 
224 |     text_box = slide.shapes.add_textbox(
225 |         left=Inches(text_left),
226 |         top=Inches(text_top),
227 |         width=Inches(text_width),
228 |         height=Inches(text_height),
229 |     )
230 |     text_frame = text_box.text_frame
231 |     text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
232 |     text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
233 |     text_frame.word_wrap = False
234 |     
235 |     # text_paragraph = text_frame.add_paragraph()
236 |     text_paragraph = add_paragraph(text_frame)
237 |     text_paragraph.text = text
238 |     text_paragraph.alignment = PP_ALIGN.CENTER
239 |     
240 |     for max_size in range(int(max_size*text_font_coeff))[::-5]:
241 |         try: 
242 |             text_frame.fit_text(
243 |                 font_file=font.basic, 
244 |                 max_size=max_size
245 |             )
246 |             break
247 |         except: 
248 |             pass
249 |            
250 | 
251 | def generate_image_slide(
252 |     presentation: Presentation, 
253 |     title: str, 
254 |     text: str, 
255 |     picture_path: str,
256 |     font: Font,
257 |     text_font_coeff: float = 0.6,
258 | ) -> None:
259 |     """
260 |     Generate a slide with a title, text, and an image, choosing the layout randomly.
261 | 
262 |     This function creates a slide in a PowerPoint presentation that includes a title, 
263 |     text, and an image. The layout is chosen randomly between two options: 
264 |     image on the right or image on the left.
265 | 
266 |     Args:
267 |         presentation (Presentation): PowerPoint presentation object.
268 |         title (str): Title for the slide.
269 |         text (str): Text content for the slide.
270 |         picture_path (str): Path to the picture to be inserted in the slide.
271 |         font (Font): Font object to manage font styles and paths.
272 |         text_font_coeff (float, optional): Coefficient to adjust the font size of the text 
273 |                                            relative to the title (default is 0.65).
274 | 
275 |     Returns:
276 |         None
277 |     """
278 |     gen_func = random.choice([
279 |         generate_text_title_image_right,
280 |         generate_text_title_image_left,
281 |     ])
282 |     gen_func(
283 |         presentation=presentation,
284 |         title=title,
285 |         text=text,
286 |         picture_path=picture_path,
287 |         font=font,
288 |         text_font_coeff=text_font_coeff,
289 |     )


--------------------------------------------------------------------------------
/src/slides/plain_text_slide.py:
--------------------------------------------------------------------------------
  1 | from pptx import Presentation
  2 | from pptx.util import Inches
  3 | from pptx.oxml.xmlchemy import OxmlElement
  4 | from pptx.dml.color import RGBColor
  5 | from pptx.enum.text import PP_ALIGN, MSO_ANCHOR, MSO_AUTO_SIZE
  6 | 
  7 | from typing import List, Callable, Optional
  8 | from PIL import Image
  9 | import random
 10 | import tqdm
 11 | import os
 12 | 
 13 | 
 14 | 
 15 | from src.font import Font
 16 | from .slide_utils import set_shape_transparency, add_paragraph
 17 | 
 18 | def generate_plain_text_slide(
 19 |     presentation: Presentation,
 20 |     title: str,
 21 |     text: str,
 22 |     font:Font,
 23 |     background_path: str = None,
 24 |     text_font_coeff:float=0.6,
 25 | ) -> None:
 26 |     """
 27 |     Add a slide with title, text placeholders on the blurred background image.
 28 | 
 29 |     Args:
 30 |         presentation (Presentation): PowerPoint presentation object
 31 |         title (str): Title for the slide
 32 |         text (str): Text content for the slide
 33 |         background_path (str): Path to the background image for the slide
 34 |         font (Font): Font object to manage font styles and paths.
 35 |         text_font_coeff (float): Coefficient to adjust the font size of the text relative to the title (default is 0.6).
 36 |     Returns:
 37 |         None
 38 |     """
 39 | 
 40 |     slide_layout = presentation.slide_layouts[6]
 41 |     slide = presentation.slides.add_slide(slide_layout)
 42 | 
 43 |     slide_height = 9
 44 |     slide_width = 16
 45 |     margin = min(slide_height, slide_width) / 18
 46 | 
 47 |     # Background image
 48 |     if background_path:
 49 |         pic = slide.shapes.add_picture(
 50 |             background_path, 0, 0,
 51 |             width=presentation.slide_width,
 52 |             height=presentation.slide_height
 53 |         )
 54 |         # This moves it to the background
 55 |         slide.shapes._spTree.remove(pic._element)
 56 |         slide.shapes._spTree.insert(2, pic._element)
 57 | 
 58 |     # Title placeholder
 59 |     title_left = margin
 60 |     title_top = margin
 61 |     title_width = slide_width - 2 * margin
 62 |     title_height = slide_height / 6
 63 | 
 64 |     title_box = slide.shapes.add_textbox(
 65 |         left=Inches(title_left),
 66 |         top=Inches(title_top),
 67 |         width=Inches(title_width),
 68 |         height=Inches(title_height),
 69 |     )
 70 |     title_frame = title_box.text_frame
 71 |     title_frame.clear()
 72 |     
 73 |     title_frame.word_wrap = False
 74 |     title_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
 75 |     title_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
 76 |     
 77 |     title_paragraph = add_paragraph(title_frame)
 78 |     title_paragraph.alignment = PP_ALIGN.CENTER
 79 |     title_paragraph.text = title
 80 | 
 81 |     for max_size in range(font.max_size)[::-5]: 
 82 |         try: 
 83 |             title_frame.fit_text(
 84 |                 font_file=font.bold, 
 85 |                 max_size=max_size, 
 86 |                 bold=True,
 87 |             )
 88 |             break
 89 |         except: 
 90 |             pass
 91 |             
 92 |     # settings white color and transparency to title shape
 93 |     title_fill = title_box.fill
 94 |     title_fill.solid()
 95 |     title_fill.fore_color.rgb = RGBColor(255, 255, 255)
 96 |     set_shape_transparency(title_box, 0.5)
 97 | 
 98 |     # Text placeholder
 99 |     text_left = Inches(margin)
100 |     text_top = Inches(title_height + margin * 2)
101 |     text_width = Inches(slide_width - 2 * margin)
102 |     text_height = Inches(slide_height - title_height - 3 * margin)
103 |     text_box = slide.shapes.add_textbox(
104 |         left=text_left,
105 |         top=text_top,
106 |         width=text_width,
107 |         height=text_height
108 |     )
109 |     text_frame = text_box.text_frame
110 |     text_frame.clear()
111 |     
112 |     text_frame.word_wrap = False
113 |     text_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
114 |     text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
115 |     
116 |     text_paragraph = add_paragraph(text_frame)
117 |     text_paragraph.text = text 
118 |     text_paragraph.alignment = PP_ALIGN.CENTER
119 | 
120 |     for max_size in range(int(max_size*text_font_coeff))[::-5]: 
121 |         try: 
122 |             text_frame.fit_text(
123 |                 font_file=font.basic, 
124 |                 max_size=max_size
125 |             )
126 |             break
127 |         except: 
128 |             pass
129 |             
130 |     # Setting text box fill to white with 80% transparency
131 |     text_fill = text_box.fill
132 |     text_fill.solid()
133 |     text_fill.fore_color.rgb = RGBColor(255, 255, 255)
134 |     set_shape_transparency(text_box, 0.5)
135 |     


--------------------------------------------------------------------------------
/src/slides/slide_utils.py:
--------------------------------------------------------------------------------
 1 | from pptx import Presentation
 2 | from pptx.util import Inches
 3 | from pptx.oxml.xmlchemy import OxmlElement
 4 | from pptx.dml.color import RGBColor
 5 | from pptx.enum.text import PP_ALIGN, MSO_ANCHOR, MSO_AUTO_SIZE
 6 | 
 7 | import random
 8 | import os
 9 | from PIL import Image
10 | from typing import List, Callable
11 | 
12 | import tqdm
13 | 
14 | def add_paragraph(text_frame): 
15 |     try:
16 |         title_paragraph = text_frame.paragraphs[0]
17 |     except:
18 |         title_paragraph = text_frame.add_paragraph()
19 |     return title_paragraph
20 |     
21 | def SubElement(parent, tagname, **kwargs):
22 |     element = OxmlElement(tagname)
23 |     element.attrib.update(kwargs)
24 |     parent.append(element)
25 |     return element
26 | 
27 | def set_shape_transparency(shape, alpha):
28 |     """ Set the transparency (alpha) of a shape"""
29 |     ts = shape.fill._xPr.solidFill
30 |     sF = ts.get_or_change_to_srgbClr()
31 |     SubElement(sF, 'a:alpha', val=str(int(alpha*100000)))


--------------------------------------------------------------------------------
/src/slides/title_slide.py:
--------------------------------------------------------------------------------
 1 | from pptx import Presentation
 2 | from pptx.util import Inches
 3 | from pptx.oxml.xmlchemy import OxmlElement
 4 | from pptx.dml.color import RGBColor
 5 | from pptx.enum.text import PP_ALIGN, MSO_ANCHOR, MSO_AUTO_SIZE
 6 | 
 7 | import random
 8 | import os
 9 | from PIL import Image
10 | from typing import List, Callable
11 | 
12 | import tqdm
13 | 
14 | from .slide_utils import set_shape_transparency, add_paragraph
15 | 
16 | from src.font import Font
17 | 
18 | 
19 | def generate_title_slide(
20 |     presentation: Presentation,
21 |     title: str,
22 |     font:Font, 
23 |     background_path: str = None, 
24 | ) -> None:
25 |     """
26 |     Add a slide with title, text placeholders on the blurred background image.
27 | 
28 |     Args:
29 |         presentation (Presentation): PowerPoint presentation object
30 |         title (str): Title for the slide
31 |         text (str): Text content for the slide
32 |         background_path (str): Path to the background image for the slide
33 |         font (Font): Font object to manage font styles and paths.
34 |     Returns:
35 |         None
36 |     """
37 | 
38 |     slide_layout = presentation.slide_layouts[6]
39 |     slide = presentation.slides.add_slide(slide_layout)
40 | 
41 |     slide_height = 9
42 |     slide_width = 16
43 |     margin = min(slide_height, slide_width) / 18
44 | 
45 |     # Background image
46 |     if background_path:
47 |         pic = slide.shapes.add_picture(
48 |             background_path, 0, 0,
49 |             width=presentation.slide_width,
50 |             height=presentation.slide_height
51 |         )
52 |         # This moves it to the background
53 |         slide.shapes._spTree.remove(pic._element)
54 |         slide.shapes._spTree.insert(2, pic._element)
55 | 
56 |     # Title placeholder
57 |     title_left = margin
58 |     title_top = margin
59 |     title_width = slide_width - 2 * margin
60 |     title_height = slide_height - 2 * margin
61 | 
62 |     title_box = slide.shapes.add_textbox(
63 |         left=Inches(title_left),
64 |         top=Inches(title_top),
65 |         width=Inches(title_width),
66 |         height=Inches(title_height),
67 |     )
68 |     title_frame = title_box.text_frame
69 |     title_frame.clear()
70 |     
71 |     title_frame.vertical_anchor = MSO_ANCHOR.MIDDLE
72 |     title_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
73 |     title_frame.word_wrap = False
74 | 
75 |     title_paragraph = add_paragraph(title_frame)
76 |     title_paragraph.alignment = PP_ALIGN.CENTER
77 |     title_paragraph.text = title
78 |     
79 |     for max_size in range(font.max_size)[::-5]: 
80 |         try: 
81 |             title_frame.fit_text(
82 |                 font_file=font.bold, 
83 |                 max_size=max_size,
84 |                 bold=True,
85 |             )
86 |             break
87 |         except TypeError: 
88 |             pass
89 | 
90 |     # settings white color and transparency to title shape
91 |     title_fill = title_box.fill
92 |     title_fill.solid()
93 |     title_fill.fore_color.rgb = RGBColor(255, 255, 255)
94 |     set_shape_transparency(title_box, 0.5)


--------------------------------------------------------------------------------