├── requirements.txt ├── README.md ├── LICENSE └── SDXL-Dynamic-Image-Generator.py /requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==0.30.1 2 | diffusers==0.28.0 3 | huggingface-hub==0.23.2 4 | pillow==10.3.0 5 | torch==2.3.0 6 | transformers==4.41.2 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Creates interpolating imagery based on your text using Stable Diffusion XL Turbo 2 | 3 | https://github.com/user-attachments/assets/dc2efd91-7513-4fd9-a00f-d3450c9f2114 4 | 5 | https://github.com/user-attachments/assets/9e06a5e5-17d0-42bd-9954-979dc6fef14f 6 | 7 | https://github.com/user-attachments/assets/522a59e6-5eb5-424f-9183-e8b500bfa6c1 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Drew00785 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /SDXL-Dynamic-Image-Generator.py: -------------------------------------------------------------------------------- 1 | import tkinter as tk 2 | from tkinter import ttk 3 | from PIL import Image, ImageTk, ImageEnhance 4 | import torch 5 | from diffusers import AutoPipelineForImage2Image 6 | import numpy as np 7 | import random 8 | 9 | class ImageGeneratorApp: 10 | def __init__(self, window, window_title): 11 | self.window = window 12 | self.window.title(window_title) 13 | self.window.geometry('1200x1350') # Adjusted window size to accommodate larger canvas and controls 14 | 15 | self.output_canvas = tk.Canvas(window, width=1024, height=1024) 16 | self.output_canvas.grid(row=0, column=0, padx=10, pady=2) 17 | 18 | self.controls_frame = ttk.Frame(window) 19 | self.controls_frame.grid(row=1, column=0, pady=2, sticky="ew") 20 | 21 | self.setup_ui() 22 | 23 | self.recording = False 24 | self.previous_frame = None # To store the previous frame for img2img feedback 25 | self.frame_count = 0 26 | 27 | self.load_model() 28 | 29 | self.window.protocol("WM_DELETE_WINDOW", self.on_closing) 30 | 31 | def setup_ui(self): 32 | font_size = ('Helvetica', 16) 33 | style = ttk.Style() 34 | style.configure('W.TButton', font=('Helvetica', 16)) 35 | 36 | # Default text prompt 37 | default_prompt = "A photograph of a water drop" 38 | 39 | # Text input prompt 40 | self.text_input_label = tk.Label(self.controls_frame, text="Prompt:", font=font_size) 41 | self.text_input_label.grid(row=0, column=0, padx=10, pady=10, sticky="w") 42 | 43 | self.text_input = tk.Text(self.controls_frame, width=40, height=2, font=font_size, wrap=tk.WORD) 44 | self.text_input.insert(tk.END, default_prompt) # Set default prompt 45 | self.text_input.grid(row=0, column=1, padx=10, pady=10, columnspan=4, sticky="ew") 46 | 47 | # Adjusted slider ranges and defaults 48 | self.strength_slider = tk.Scale(self.controls_frame, from_=0.0, to=1.0, resolution=0.1, orient=tk.HORIZONTAL, label="Strength", length=200) 49 | self.strength_slider.set(1.0) # Set default 50 | self.strength_slider.grid(row=1, column=0, padx=5, pady=5, sticky="ew") 51 | 52 | self.guidance_scale_slider = tk.Scale(self.controls_frame, from_=0.0, to=1.0, resolution=0.1, orient=tk.HORIZONTAL, label="Guidance Scale", length=200) 53 | self.guidance_scale_slider.set(1.0) # Set default 54 | self.guidance_scale_slider.grid(row=1, column=1, padx=5, pady=5, sticky="ew") 55 | 56 | self.num_steps_slider = tk.Scale(self.controls_frame, from_=1, to=50, resolution=1, orient=tk.HORIZONTAL, label="Num Inference Steps", length=200) 57 | self.num_steps_slider.set(2) # Set default 58 | self.num_steps_slider.grid(row=1, column=2, padx=5, pady=5, sticky="ew") 59 | 60 | # New seed slider 61 | self.seed_slider = tk.Scale(self.controls_frame, from_=0, to=10000, resolution=1, orient=tk.HORIZONTAL, label="Seed", length=200) 62 | self.seed_slider.set(1) # Set default to 1 63 | self.seed_slider.grid(row=1, column=3, padx=5, pady=5, sticky="ew") 64 | 65 | self.btn_toggle_record = ttk.Button(self.controls_frame, text="Toggle Generation", command=self.toggle_recording, width=20, style='W.TButton') 66 | self.btn_toggle_record.grid(row=2, column=0, padx=10, pady=10, columnspan=4) 67 | 68 | def load_model(self): 69 | self.pipe = AutoPipelineForImage2Image.from_pretrained("stabilityai/sdxl-turbo", torch_dtype=torch.float16, variant="fp16") 70 | self.pipe.to("cuda") 71 | 72 | def toggle_recording(self): 73 | # Toggles between recording and not recording states 74 | self.recording = not self.recording 75 | if self.recording: 76 | print("Generation started...") 77 | self.generate_images() 78 | else: 79 | print("Generation stopped.") 80 | 81 | def generate_images(self): 82 | if self.recording: 83 | self.process_and_display_frame() 84 | self.window.after(10, self.generate_images) # Continue generating images every x ms 85 | 86 | def process_and_display_frame(self): 87 | prompt = self.text_input.get("1.0", tk.END).strip() 88 | seed = self.seed_slider.get() 89 | 90 | if prompt: 91 | torch.manual_seed(seed) 92 | 93 | # Create an initial image from the prompt if it's the first frame 94 | if self.previous_frame is None: 95 | init_image = Image.new('RGB', (512, 512), color='white') 96 | transformed_image = self.pipe(prompt=prompt, 97 | image=init_image, 98 | strength=self.strength_slider.get(), 99 | guidance_scale=self.guidance_scale_slider.get(), 100 | num_inference_steps=self.num_steps_slider.get()).images[0] 101 | self.previous_frame = transformed_image 102 | else: 103 | # Apply random perturbations to the previous frame 104 | perturbed_image = self.apply_random_perturbations(self.previous_frame) 105 | 106 | # Use the previous frame for the next iteration 107 | transformed_image = self.pipe(prompt=prompt, 108 | image=perturbed_image, 109 | strength=self.strength_slider.get(), 110 | guidance_scale=self.guidance_scale_slider.get(), 111 | num_inference_steps=self.num_steps_slider.get()).images[0] 112 | 113 | # Blend the previous and current frames with a reduced effect 114 | blended_image = self.blend_images(self.previous_frame, transformed_image, alpha=0.1) 115 | 116 | self.previous_frame = blended_image # Update the previous frame for the next iteration 117 | 118 | self.display_transformed_image(blended_image) 119 | 120 | # Increment the seed every few frames 121 | self.frame_count += 1 122 | if self.frame_count % 20 == 0: # Change the seed every 20 frames 123 | self.seed_slider.set(seed + 1) 124 | 125 | def apply_random_perturbations(self, image): 126 | enhancer = ImageEnhance.Brightness(image) 127 | image = enhancer.enhance(1 + random.uniform(-0.05, 0.05)) # Adjust brightness slightly 128 | enhancer = ImageEnhance.Contrast(image) 129 | image = enhancer.enhance(1 + random.uniform(-0.05, 0.05)) # Adjust contrast slightly 130 | return image 131 | 132 | def blend_images(self, prev_img, curr_img, alpha=0.1): 133 | prev_array = np.array(prev_img) 134 | curr_array = np.array(curr_img) 135 | blended_array = (alpha * prev_array + (1 - alpha) * curr_array).astype(np.uint8) 136 | return Image.fromarray(blended_array) 137 | 138 | def display_transformed_image(self, transformed_image): 139 | photo = ImageTk.PhotoImage(transformed_image.resize((1024, 1024), Image.LANCZOS)) 140 | self.output_canvas.create_image(0, 0, image=photo, anchor=tk.NW) 141 | self.output_canvas.image = photo # Keep a reference! 142 | 143 | def on_closing(self): 144 | # Properly closes the application and releases resources 145 | self.recording = False 146 | self.window.destroy() 147 | 148 | def main(): 149 | root = tk.Tk() 150 | app = ImageGeneratorApp(root, "Image Generator App") 151 | root.mainloop() 152 | 153 | if __name__ == '__main__': 154 | main() 155 | --------------------------------------------------------------------------------