├── input └── .gitkeep ├── requirements.txt ├── .gitattributes ├── .gitignore ├── exceptions └── argument_exception.py ├── detectors ├── detector_base.py └── template_detector.py ├── processor └── processor_base.py ├── LICENSE ├── .vscode └── launch.json ├── README.md ├── infinite_zoom.py ├── helper └── image_helper.py └── infinite_zoom_impl.py /input/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | opencv-python 2 | screeninfo 3 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | *.png 3 | *.mp4 4 | *.pyc 5 | .vs/* 6 | *.zip 7 | *.jpg 8 | *.webm 9 | -------------------------------------------------------------------------------- /exceptions/argument_exception.py: -------------------------------------------------------------------------------- 1 | class ArgumentException(Exception): 2 | def __init__(self, msg : str): 3 | super().__init__(msg) 4 | -------------------------------------------------------------------------------- /detectors/detector_base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | 4 | class DetectorBase(ABC): 5 | def __init__(self, name): 6 | self.__name = name 7 | 8 | @property 9 | def name(self): 10 | return self.__name 11 | 12 | @abstractmethod 13 | def search(self, file): 14 | pass -------------------------------------------------------------------------------- /processor/processor_base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | import numpy as np 3 | 4 | 5 | class ProcessorBase(ABC): 6 | def __init__(self, name): 7 | self._name = name 8 | 9 | @property 10 | def name(self): 11 | return self._name 12 | 13 | @abstractmethod 14 | def process(image : np.array) -> np.array: 15 | pass -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Ingo Berg 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | // Use IntelliSense to learn about possible attributes. 3 | // Hover to view descriptions of existing attributes. 4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 5 | "version": "0.2.0", 6 | "configurations": [ 7 | { 8 | "name": "Python: Current File", 9 | "type": "python", 10 | "request": "launch", 11 | "program": "${file}", 12 | "console": "integratedTerminal", 13 | "justMyCode": true, 14 | "args": [ 15 | "-zf", "2", 16 | "-zs", "100", 17 | "-zc", "0.8", 18 | "-d", "1.0", 19 | // "-i", "input/sample_1.33x", 20 | // "-i", "./input/sample_fairytale", 21 | // "-i", "./input/sample_dark", 22 | // "-i", "./input/sample_nostradamus", 23 | "-i", "./input/sample_nostradamus", 24 | // "-i", "./input/lexica", 25 | // "-i", "./input/sample_pirate", 26 | // "-i", "./input/sample2a", 27 | // "-i", "./input/sample_ps2", 28 | // "-as", 29 | "-rev", 30 | // "-dbg", 31 | // "-o", "output_frames/"] 32 | "-o", "myoutput.mp4"] 33 | } 34 | ] 35 | } -------------------------------------------------------------------------------- /detectors/template_detector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | from detectors.detector_base import DetectorBase 5 | 6 | 7 | class TemplateDetector(DetectorBase): 8 | def __init__(self, threshold = 0.7, max_num = -100, method = cv2.TM_CCORR_NORMED): 9 | super(TemplateDetector, self).__init__("TemplateDetector") 10 | 11 | if method in [cv2.TM_CCOEFF, cv2.TM_CCORR, cv2.TM_SQDIFF]: 12 | raise Exception("serch requires a normalized algorithm!") 13 | 14 | self.__method = method 15 | self.__threshold = threshold 16 | self.__max_num = max_num 17 | 18 | @property 19 | def threshold(self): 20 | return self.__threshold 21 | 22 | @threshold.setter 23 | def threshold(self, value): 24 | self.__threshold = value 25 | 26 | @property 27 | def pattern(self): 28 | return self.__pattern 29 | 30 | @pattern.setter 31 | def pattern(self, pat): 32 | self.__pattern = pat 33 | self.__height, self.__width = self.__pattern.shape[:2] 34 | 35 | @property 36 | def max_num(self): 37 | return self.__max_num 38 | 39 | @max_num.setter 40 | def max_num(self, value): 41 | self.__max_num = value 42 | 43 | def load(self, file): 44 | self.__pattern = cv2.imread(file) 45 | self.__height, self.__width = self.__pattern.shape[:2] 46 | 47 | 48 | def search(self, image : np.array, threshold : float = None): 49 | if image is None: 50 | raise Exception('Image is null!') 51 | 52 | if self.__method in [cv2.TM_CCOEFF, cv2.TM_CCORR, cv2.TM_SQDIFF]: 53 | raise Exception("serch requires a normalized algorithm!") 54 | 55 | num_channels = len(image.shape) 56 | if num_channels==2: 57 | self.__pattern = cv2.cvtColor(self.__pattern, cv2.COLOR_BGR2GRAY) 58 | 59 | if image.dtype.name == 'float32' and self.__pattern.dtype.name == 'uint8': 60 | self.__pattern = np.float32(self.__pattern) 61 | self.__pattern = self.__pattern / 255.0 62 | 63 | res = cv2.matchTemplate(image, self.__pattern, self.__method) 64 | if self.__method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]: 65 | res = 1 - res 66 | 67 | result_copy = res.copy() 68 | 69 | if threshold is None: 70 | threshold = self.__threshold 71 | 72 | img_height, img_width = image.shape[:2] 73 | 74 | max_val = 1 75 | rects = [] 76 | 77 | ct = 0 78 | while max_val > threshold: 79 | min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res) 80 | ct += 1 81 | 82 | if ct > self.__max_num: 83 | break 84 | 85 | x, y = max_loc 86 | if max_val > threshold: 87 | h1 = np.clip(max_loc[1] - self.__height//2, 0, img_height) 88 | h2 = np.clip(max_loc[1] + self.__height//2 + 1, 0, img_height) 89 | 90 | w1 = np.clip(max_loc[0] - self.__width//2, 0, img_width) 91 | w2 = np.clip(max_loc[0] + self.__width//2 + 1, 0, img_width) 92 | res[h1:h2, w1:w2] = 0 93 | 94 | # note: The size of the match image is smaller by the size of the pattern. 95 | # therefor pattern size/2 needs to be added. 96 | rects.append((int(x + self.__width//2), int(y + self.__height//2), self.__width, self.__height, max_val, 0)) 97 | 98 | return np.array(rects), result_copy -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Creating an "Inifinite Zoom" from AI-Outpainted images 2 | The Python command line script published here can turn a series of AI-generated images into a zoom animation. For more details have a look at my web page were I explain the inner workings in detail: 3 | 4 | * In German: https://beltoforion.de/de/infinite_zoom 5 | * In English: https://beltoforion.de/en/infinite_zoom 6 | 7 | Here is an example video created by the script: 8 | 9 | https://github.com/beltoforion/ai_ever_zoom/assets/2202567/78bcbe99-8dbb-48d7-88bf-f8f400ed10c9 10 | 11 | ## What is AI Outpainting? 12 | Outpainting is a technique where zoom out of an image by a certain factor while letting a generative AI 13 | fill in the newly created empty edge. By giving the AI new prompts, you can control the evolution of the scene 14 | as you zoom out. 15 | 16 | ![outpainting_example](https://github.com/beltoforion/ai_ever_zoom/assets/2202567/206d4f06-6a9b-4b9b-8377-131a319d2457) 17 | 18 | AI outpainting requires the use of a generative AI for images and can be done with Midjourney, Dall-E or Photoshop 19 | (generative AI currently only in beta). I have only tested this command line script on Midjourney images as they are 20 | the easiest to create. In principle, this program will work with any outpainted image set. 21 | 22 | ## Preparing the images 23 | Before you start you need a set of outpainted ai images. Copy this set in a separate folder in the "input" folder. It is best 24 | to order the images in the folder by giving them sequential names (i.e. "frame_01.png", "frame_02.png", ..., "frame_10.png"). 25 | 26 | Alternatively you can use the "-as" option to let the script find out the image order for you but this will take some time as 27 | each image is matched against all other images to figure out their relations automatically. 28 | 29 | * Create a set of outpainted images with the generative AI of your choice. 30 | + The first image is the innermost image of the series. 31 | + The Image series must be zoomed with respect to the center 32 | + The entire image series must use the same zoom factor (i.e. 2x) 33 | * Rename and order the image sequence by giving them sequential names. (i.e. "frame_01.png", "frame_02.png") 34 | 35 | ## Usage 36 | 37 | You need python to execute this script. Put your input images into a folder and then run the script on the content of this folder. 38 | 39 | ```python 40 | python ./infinite_zoom.py -zf 2 -zs 100 -zc 0.8 -i ./samples_ps -o video.mp4 41 | ``` 42 | or an example to dump the frames without creating a video file: 43 | 44 | ```python 45 | python3 ./infinite_zoom.py -as -i ./sample_fairytale -o myframes/ 46 | ``` 47 | 48 | 49 | ## Command Line Options 50 | 51 | -zf
Zoom factor used for creating the outpinted image sequence. For image sequences created by Midjourney use either "2" or "1.333". (Midjourney incorrectly states that it low zoom level is 1.5 but it is actually just 1.333) For an image series created with Lexica use 1.5. If you are unsure compute the zoom factor yourself by using two successive images of the series and matching them manually in the image processing software of your choice. If the zoom factor is incorrect this script cannot work properly! 52 |

53 | -zs
Number of zoom steps for each image 54 |

55 | -zc
Crop zoomed images by this factor. Midjourney takes some liberties in modyfing the edge regions between zoom steps. They may not match perfectly. 56 |

57 | -i
Path to folder with input images. 58 |

59 | -o
Name of the output folder or file. Must either be a valid file name with an mp4 extension or a folder name. If no extension is given it is assumed to be a folder name and the output will consist of the frame dump instead of a single video file. 60 |

61 | -as
Automatically sort input images. If you use this option you can name the images arbitrarily. The script will figure out the right order. 62 |

63 | -dbg
Show debug overlays and display debug output for each auto sort step. 64 |

65 | -rev
Reverse the video. This will create a zoom out effect. 66 |

67 | -fps
Set the target framerate of the output video. 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /infinite_zoom.py: -------------------------------------------------------------------------------- 1 | import infinite_zoom_impl as izoom 2 | import argparse 3 | import pathlib 4 | 5 | 6 | from pathlib import Path 7 | 8 | def valid_crop_range(x): 9 | try: 10 | x = float(x) 11 | except ValueError: 12 | raise argparse.ArgumentTypeError(f'{x} is not a floating-point literal') 13 | 14 | if x < 0.1 or x > 0.95: 15 | raise argparse.ArgumentTypeError(f'{x:0.2f} not in range [0.1, 0.95]') 16 | 17 | return x 18 | 19 | 20 | def main(): 21 | parser = argparse.ArgumentParser(description='AI Outpainting Zoom Generator - Turn an AI generated image series into an animation') 22 | parser.add_argument('-zf', '--ZoomFactor', dest='zoom_factor', help='The outpainting zoom factor set up when creating the image sequence.', required=False, type=float, default=2) 23 | parser.add_argument('-zs', '--ZoomSteps', dest='zoom_steps', help='The number of zoom steps to be generated between two successive images.', required=False, type=int, default=100) 24 | parser.add_argument('-zc', '--ZoomCrop', dest='zoom_crop', help='Set the crop factor of each zoom steps followup image. This is helpfull to hide image varyations on the edges.', required=False, type=valid_crop_range, default=0.8) 25 | parser.add_argument('-o', '--Output', dest='output', help='Name of output file or folder. If this is a folder name the output will consist of the frames. If it is not a folder name it is assumed to be the name of the mp4 output file.', required=False, type=str, default='output.mp4') 26 | parser.add_argument('-i', '--Input', dest='input_folder', help='Path to to folder containing input images.', required=True, type=str) 27 | parser.add_argument('-as', '--AutoSort', dest='auto_sort', help='Input images are unsorted, automatically sort them.', required=False, action='store_true', default=False) 28 | parser.add_argument('-dbg', '--Debug', dest='debug', help='Enable debug aides', required=False, action='store_true', default=False) 29 | parser.add_argument('-d', '--Delay', dest='delay', help='Start/Stop delay in seconds', required=False, type=float, default=0.0) 30 | parser.add_argument('-rev', '--Reverse', dest='reverse', help='Reverse the output video.', required=False, action='store_true', default=False) 31 | parser.add_argument('-fps', '--FramesPerSecond', dest='fps', help='Frames per second in the output video.', required=False, type=float, default=60.0) 32 | 33 | args = parser.parse_args() 34 | 35 | print('\r\n') 36 | print('AI Outpainting Zoom Video Generator') 37 | print('-----------------------------------') 38 | print(f' - input folder: "{args.input_folder}"') 39 | print(f' - output: "{args.output}"') 40 | print(f' - fps: {args.fps}') 41 | print(f' - zoom factor: {args.zoom_factor}') 42 | print(f' - zoom steps: {args.zoom_steps}') 43 | print(f' - zoom crop: {args.zoom_crop}') 44 | print(f' - auto sort: {args.auto_sort}') 45 | print(f' - debug aides: {args.debug}') 46 | print(f' - delay: {args.delay}') 47 | print(f' - reverse: {args.reverse}') 48 | 49 | param = izoom.InfiniZoomParameter() 50 | param.reverse = args.reverse 51 | param.auto_sort = args.auto_sort 52 | param.debug_mode = args.debug 53 | param.zoom_image_crop = args.zoom_crop 54 | param.zoom_steps = args.zoom_steps 55 | param.zoom_factor = args.zoom_factor # The zoom factor used by midjourney 56 | param.input_path = Path(args.input_folder) 57 | param.delay = args.delay 58 | param.fps = args.fps 59 | 60 | # when no extension is present we assume it is an output folder. In this 61 | # case no video is created and all frames are saved into the output folder 62 | output_ext = pathlib.Path(args.output).suffix 63 | if output_ext == '': 64 | if not pathlib.Path(args.output).is_dir(): 65 | print(f"\nCreating frame output folder: {args.output}") 66 | pathlib.Path(args.output).mkdir(parents=True, exist_ok=True) 67 | 68 | param.output_frames = True 69 | param.output_file = None 70 | param.output_folder = args.output 71 | else: 72 | param.output_frames = False 73 | param.output_file = args.output # name of the output video file 74 | param.output_folder = None 75 | 76 | iz = izoom.InfiniZoom(param) 77 | iz.process() 78 | 79 | if __name__ == "__main__": 80 | main() -------------------------------------------------------------------------------- /helper/image_helper.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pathlib 3 | import cv2 4 | from typing import Union 5 | 6 | from processor.processor_base import * 7 | 8 | 9 | def crop_image(image, crop_size): 10 | """ Crop an image. 11 | 12 | Crop is done centric. 13 | """ 14 | h, w = image.shape[:2] 15 | cx = w //2 16 | cy = h //2 17 | 18 | start_x = cx - crop_size[0]//2 19 | start_y = cy - crop_size[1]//2 20 | 21 | end_x = start_x + crop_size[0] 22 | end_y = start_y + crop_size[1] 23 | 24 | cropped_image = image[start_y:end_y, start_x:end_x] 25 | 26 | return cropped_image 27 | 28 | 29 | def draw_cross(image, center, size, color=(0, 255, 0), thickness=2): 30 | """ 31 | Draw a cross on the given image at the specified center coordinates with the given size, color, and thickness. 32 | 33 | Parameters: 34 | image (numpy.ndarray): The image on which to draw the cross. 35 | center (tuple): The center coordinates of the cross in (x, y) format. 36 | size (int): The size of the cross. 37 | color (tuple): The color of the cross in BGR format. Default is green (0, 255, 0). 38 | thickness (int): The thickness of the cross lines. Default is 2. 39 | """ 40 | x, y = center 41 | half_size = size // 2 42 | 43 | cv2.line(image, (x - half_size, y), (x + half_size, y), color, thickness) 44 | cv2.line(image, (x, y - half_size), (x, y + half_size), color, thickness) 45 | 46 | 47 | def read_image(file : str, processor : Union[ProcessorBase, list] = None): 48 | """ Read an image in raw or jpeg. 49 | The image will be preprocesses with a list of processors. 50 | """ 51 | 52 | ext = pathlib.Path(file).suffix 53 | image : np.array = cv2.imread(file) 54 | 55 | original_image = image.copy() 56 | 57 | if type(processor) is list: 58 | for p in processor: 59 | image = p.process(image) 60 | elif isinstance(processor, ProcessorBase): 61 | image = processor.process(image) 62 | elif processor is None: 63 | pass 64 | 65 | return image, original_image 66 | 67 | 68 | def read_images_folder(path : pathlib.Path): 69 | pathlist = sorted(path.glob('*.*')) 70 | 71 | images = [] 72 | 73 | for path in pathlist: 74 | if not path.suffix.lower() in ['.png', '.jpg', '.jpeg']: 75 | continue 76 | 77 | img_orig, _ = read_image(str(path), None) 78 | if img_orig is not None: 79 | images.append(img_orig) 80 | 81 | if len(images)>0: 82 | first_image_shape = images[0].shape 83 | if not all(image.shape == first_image_shape for image in images): 84 | raise Exception(f"Reading images failed because not all images in the folder have equal size! Expected image size is {first_image_shape[:2]}.") 85 | 86 | return np.array(images) 87 | 88 | 89 | def create_radial_mask(h, w, inner_radius_fraction=0.4, outer_radius_fraction=1.0): 90 | center = (int(w / 2), int(h / 2)) 91 | inner_radius = int(min(center[0], center[1]) * inner_radius_fraction) 92 | outer_radius = int(min(center[0], center[1]) * outer_radius_fraction) 93 | 94 | Y, X = np.ogrid[:h, :w] 95 | dist_from_center = np.sqrt((X - center[0]) ** 2 + (Y - center[1]) ** 2) 96 | 97 | mask = np.zeros((h, w)) 98 | mask[dist_from_center <= inner_radius] = 1.0 99 | mask[dist_from_center >= outer_radius] = 0.0 100 | 101 | transition = np.logical_and(dist_from_center > inner_radius, dist_from_center < outer_radius) 102 | mask[transition] = (outer_radius - dist_from_center[transition]) / (outer_radius - inner_radius) 103 | 104 | return mask 105 | 106 | def overlay_images(background, foreground, position, relative_to='corner', opacity=1): 107 | # Get the dimensions of the foreground image 108 | fh, fw, _ = foreground.shape 109 | 110 | # Create an alpha mask of the same size as the foreground image 111 | mask = create_radial_mask(fh, fw, 0.4, 1 + opacity) 112 | 113 | # Convert foreground to float and normalize 114 | foreground = foreground.astype(float) / 255 115 | 116 | # Create a 4-channel image (RGB + alpha) for the foreground 117 | foreground_alpha = np.dstack([foreground, mask]) 118 | 119 | # Get the position 120 | x, y = position 121 | 122 | # If position is relative to the center, adjust the position 123 | if relative_to == 'center': 124 | y = background.shape[0]//2 - fh//2 + y 125 | x = background.shape[1]//2 - fw//2 + x 126 | 127 | # Calculate the overlay region 128 | overlay_x_start = max(x, 0) 129 | overlay_y_start = max(y, 0) 130 | overlay_x_end = min(x+fw, background.shape[1]) 131 | overlay_y_end = min(y+fh, background.shape[0]) 132 | 133 | # Calculate the region of the foreground to be overlayed 134 | foreground_x_start = max(0, -x) 135 | foreground_y_start = max(0, -y) 136 | foreground_x_end = min(fw, overlay_x_end - x) 137 | foreground_y_end = min(fh, overlay_y_end - y) 138 | 139 | # Prepare the overlay with the correct opacity 140 | foreground_region = foreground_alpha[foreground_y_start:foreground_y_end, foreground_x_start:foreground_x_end] 141 | background_region = background[overlay_y_start:overlay_y_end, overlay_x_start:overlay_x_end] / 255 142 | 143 | overlay = (foreground_region[..., :3] * foreground_region[..., 3:4] * opacity + 144 | background_region * (1 - foreground_region[..., 3:4] * opacity)) * 255 145 | 146 | # Overlay the appropriately sized and positioned region of the foreground onto the background 147 | background[overlay_y_start:overlay_y_end, overlay_x_start:overlay_x_end] = overlay.astype(np.uint8) 148 | 149 | return background 150 | -------------------------------------------------------------------------------- /infinite_zoom_impl.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | from exceptions.argument_exception import ArgumentException 3 | from detectors.template_detector import * 4 | 5 | import helper.image_helper as ih 6 | import cv2 7 | import screeninfo 8 | import math 9 | import time 10 | 11 | 12 | class InfiniZoomParameter: 13 | def __init__(self): 14 | self.__zoom_steps = 100 15 | self.__reverse = False 16 | self.__auto_sort = False 17 | self.__zoom_image_crop = 0.8 18 | self.__zoom_factor = 2 19 | self.__debug_mode = False 20 | self.__fps = 60 21 | self.__output_frames = False 22 | self.__output_folder = '' 23 | self.__output_file = '' 24 | 25 | @property 26 | def output_frames(self): 27 | return self.__output_frames 28 | 29 | @output_frames.setter 30 | def output_frames(self, value: float): 31 | self.__output_frames = value 32 | 33 | @property 34 | def fps(self): 35 | return self.__fps 36 | 37 | @fps.setter 38 | def fps(self, value: float): 39 | self.__fps = value 40 | 41 | @property 42 | def delay(self): 43 | return self.__delay 44 | 45 | @delay.setter 46 | def delay(self, value: float): 47 | self.__delay = value 48 | 49 | @property 50 | def debug_mode(self): 51 | return self.__debug_mode 52 | 53 | @debug_mode.setter 54 | def debug_mode(self, state: bool): 55 | self.__debug_mode = state 56 | 57 | @property 58 | def zoom_factor(self): 59 | return self.__zoom_factor 60 | 61 | @zoom_factor.setter 62 | def zoom_factor(self, f: float): 63 | self.__zoom_factor = f 64 | 65 | @property 66 | def zoom_image_crop(self): 67 | return self.__zoom_image_crop 68 | 69 | @zoom_image_crop.setter 70 | def zoom_image_crop(self, crop: float): 71 | self.__zoom_image_crop = crop 72 | 73 | @property 74 | def reverse(self): 75 | return self.__reverse 76 | 77 | @reverse.setter 78 | def reverse(self, stat: bool): 79 | self.__reverse = stat 80 | 81 | @property 82 | def auto_sort(self): 83 | return self.__auto_sort 84 | 85 | @auto_sort.setter 86 | def auto_sort(self, stat: bool): 87 | self.__auto_sort = stat 88 | 89 | @property 90 | def zoom_steps(self): 91 | return self.__zoom_steps 92 | 93 | @zoom_steps.setter 94 | def zoom_steps(self, steps: int): 95 | if steps<1: 96 | raise ArgumentException("Range error: steps must be greater than 0") 97 | 98 | self.__zoom_steps = steps 99 | 100 | @property 101 | def input_path(self): 102 | return self.__input_path 103 | 104 | @input_path.setter 105 | def input_path(self, path : Path): 106 | self.__input_path = path 107 | 108 | @property 109 | def output_file(self): 110 | return self.__output_file 111 | 112 | @output_file.setter 113 | def output_file(self, file): 114 | self.__output_file = file 115 | 116 | @property 117 | def output_folder(self): 118 | return self.__output_folder 119 | 120 | @output_folder.setter 121 | def output_folder(self, folder): 122 | self.__output_folder = folder 123 | 124 | 125 | class InfiniZoom: 126 | def __init__(self, param : InfiniZoomParameter): 127 | self.__param = param 128 | self.__image_list = [] 129 | self.__video_writer = None 130 | self.__frames = [] 131 | 132 | self.__font = cv2.FONT_HERSHEY_DUPLEX 133 | self.__fontScale = 0.6 134 | self.__fontThickness = 1 135 | self.__fontLineType = 1 136 | 137 | # get screen resolution 138 | screen = screeninfo.get_monitors()[0] 139 | self.__screen_width = screen.width 140 | self.__screen_height = screen.height 141 | 142 | 143 | def __load_images(self): 144 | if not self.__param.input_path.exists(): 145 | raise Exception("input path does not exist") 146 | 147 | print(f'\nReading images from "{str(self.__param.input_path)}"') 148 | self.__image_list = ih.read_images_folder(self.__param.input_path) 149 | print(f' - {len(self.__image_list)} images read\n') 150 | 151 | 152 | def __print_matrix(self, matrix): 153 | rows, cols = matrix.shape 154 | for i in range(rows): 155 | for j in range(cols): 156 | if matrix[i, j]==0: 157 | print(' -- ', end=" ") # Print element followed by a tab 158 | else: 159 | print(f'{matrix[i, j]:.2f}', end=" ") # Print element followed by a tab 160 | 161 | print() 162 | 163 | 164 | def __auto_sort(self): 165 | print(f'Determining image order') 166 | 167 | # ibg 2023-08-19: #1 168 | # changed method to TM_CCOEFF_NORMED from TM_CCORR_NORMED because the latter 169 | # one failed with some images. Well it did not really fail but if found a 170 | # false match to the first image in the series with a score of 0.92 (all other 171 | # matches had a clean 1.0). 172 | detector = TemplateDetector(threshold=0.01, max_num=1, method = cv2.TM_CCOEFF_NORMED) 173 | 174 | num = len(self.__image_list) 175 | scores = np.zeros((num, num)) 176 | 177 | prog = 0 178 | debug_frames = None 179 | 180 | for i in range(0, num): 181 | max_score = 0 182 | best_match = None 183 | 184 | for j in range(0, num): 185 | if i==j: 186 | continue 187 | 188 | prog += 1 189 | print(f' - matching images {100*prog/(num*num-num):.0f} % ', end='\r') 190 | 191 | img1 = self.__image_list[i].copy() 192 | img2 = self.__image_list[j].copy() 193 | if img1.shape != img2.shape: 194 | raise Exception("Auto sort failed: Inconsistent image sizes!") 195 | 196 | h, w = img1.shape[:2] 197 | 198 | mtx_scale = cv2.getRotationMatrix2D((0, 0), 0, 1/self.__param.zoom_factor) 199 | img2 = cv2.warpAffine(img2, mtx_scale, (int(w*1/self.__param.zoom_factor), int(h*1/self.__param.zoom_factor))) 200 | 201 | detector.pattern = img2 202 | result, result_img = detector.search(img1) 203 | 204 | if len(result)==0: 205 | print(f'Correlating image {i} with image {j}: Cannot find any related image. The series zoom factor is incorrect or you have unrealted images in the input folder!') 206 | continue 207 | else: 208 | bx, by, bw, bh, score = result[0, :5] 209 | 210 | if score > max_score: 211 | max_score = score 212 | best_match = self.__image_list[j].copy() 213 | 214 | if self.__param.debug_mode: 215 | # convert result_img to 8 bit 216 | result_img = np.clip(result_img * 255, 0, 255).astype(np.uint8) 217 | 218 | # copy correlation image centered into an image with the same size 219 | # as the original 220 | corr_result = np.zeros(img1.shape, np.uint8) 221 | rh, rw = result_img.shape[0:2] 222 | corr_result[rh//2:(rh//2)+rh, rw//2:(rw//2)+rw, :] = result_img[..., np.newaxis] 223 | 224 | overview_image = np.zeros((h*2, w*2, 3), np.uint8) 225 | overview_image[0:h, 0:w, :] = img1 226 | overview_image[0:h, w:w+best_match.shape[1], :] = best_match 227 | 228 | overview_image[h:h+img2.shape[0], 0:img2.shape[1], :] = img2 229 | overview_image[h:h+corr_result.shape[0], w:w+corr_result.shape[1], :] = corr_result 230 | 231 | max_width = 1200 232 | scale = max_width / overview_image.shape[1] 233 | new_width = int(overview_image.shape[1] * scale) 234 | new_height = int(overview_image.shape[0] * scale) 235 | 236 | overview_image = cv2.resize(overview_image, (new_width, new_height)) 237 | ho, wo = overview_image.shape[:2] 238 | cv2.putText(overview_image, f'series image {i} of {num}; progress is {100*prog/(num*num-num):.0f} %', (20, 20), self.__font, self.__fontScale, (0,255,0), self.__fontThickness, self.__fontLineType) 239 | cv2.putText(overview_image, f'best match so far; score={max_score:.2f}', (wo//2 + 20, 20), self.__font, self.__fontScale, (0,255,0), self.__fontThickness, self.__fontLineType) 240 | cv2.putText(overview_image, f'normalized cross correlation', (wo//2 + 20, ho//2+20), self.__font, self.__fontScale, (0,255,0), self.__fontThickness, self.__fontLineType) 241 | cv2.putText(overview_image, f'candidate {j}', (20, ho//2+20), self.__font, self.__fontScale, (0,255,0), self.__fontThickness, self.__fontLineType) 242 | 243 | if debug_frames!=None: 244 | debug_frames.append(overview_image.copy()) 245 | 246 | cv2.imshow("Finding image order", self.__downscale_to_screen(overview_image, 1920, 1080)) 247 | cv2.waitKey(10) 248 | 249 | scores[i, j] = score 250 | 251 | cv2.waitKey() 252 | 253 | if debug_frames!=None: 254 | time.sleep(1) 255 | 256 | for i in range(0,20): 257 | debug_frames.append(overview_image.copy()) 258 | 259 | # process the data to find the best matches for each image 260 | self.__image_list = self.__filter_array(scores) 261 | cv2.destroyAllWindows() 262 | 263 | if debug_frames!=None: 264 | vh, vw = overview_image.shape[:2] 265 | self.__video_writer = cv2.VideoWriter("debug.mp4", cv2.VideoWriter_fourcc(*'mp4v'), 10, (vw, vh)) 266 | 267 | for frame in debug_frames: 268 | self.__video_writer.write(frame) 269 | 270 | self.__video_writer.release() 271 | 272 | 273 | def __downscale_to_screen(self, img, screen_width, screen_height): 274 | """ 275 | Downscale an image so that it fits the screen dimensions while maintaining its aspect ratio. 276 | 277 | Args: 278 | - img (numpy.ndarray): The input image. 279 | - screen_width (int): The width of the screen. 280 | - screen_height (int): The height of the screen. 281 | 282 | Returns: 283 | - numpy.ndarray: The downscaled image. 284 | """ 285 | # Obtain the width and height of the image 286 | img_height, img_width = img.shape[:2] 287 | 288 | # Determine the aspect ratio of the image 289 | aspect_ratio = img_width / img_height 290 | 291 | # Calculate the dimensions if we were to fit by width 292 | new_width_by_w = screen_width 293 | new_height_by_w = int(screen_width / aspect_ratio) 294 | 295 | # Calculate the dimensions if we were to fit by height 296 | new_width_by_h = int(screen_height * aspect_ratio) 297 | new_height_by_h = screen_height 298 | 299 | # Choose the dimensions that fit within the screen 300 | if new_width_by_w <= screen_width and new_height_by_w <= screen_height: 301 | new_width, new_height = new_width_by_w, new_height_by_w 302 | else: 303 | new_width, new_height = new_width_by_h, new_height_by_h 304 | 305 | # Resize the image 306 | resized_img = cv2.resize(img, (new_width, new_height)) 307 | 308 | return resized_img 309 | 310 | 311 | def __filter_array(self, arr): 312 | filtered = np.zeros(arr.shape) 313 | 314 | # Get the indices of the row and column maxima 315 | row_max_indices = np.argmax(arr, axis=1) 316 | col_max_indices = np.argmax(arr, axis=0) 317 | 318 | # We need to find the first image of the series now. To do this we must check the 319 | # images that could not be matched as a follow up image to any of the images. 320 | # This could be images that were added accidentally but the first image is also unlinked! 321 | unlinked_images = [] 322 | for r in range(arr.shape[0]): 323 | # index of the maximum value of row r 324 | col_max = row_max_indices[r] 325 | 326 | # is this also the column maximum? 327 | if col_max_indices[col_max]==r: 328 | filtered[r, col_max] = arr[r, col_max] 329 | else: 330 | unlinked_images.append(r) 331 | 332 | # Print the image connection matrix 333 | print(f'\n\nImage relation matrix:') 334 | self.__print_matrix(filtered) 335 | 336 | # Now eliminate all unlinked images and find the first one: 337 | num_unlinked = len(unlinked_images) 338 | # print(f' - found {num_unlinked} unlinked images.') 339 | if num_unlinked == 0: 340 | print(f' - Warning: Cannot identify the first frame! This means that any image in the sequence is a good match as a follow-up image to another image in the same series.') 341 | 342 | if num_unlinked > 1: 343 | print(f' - Warning: Your series contains {num_unlinked-1} images that cannot be matched!') 344 | 345 | print('\nFinding first image:') 346 | start_candidates = [] 347 | for i in range(0, len(unlinked_images)): 348 | idx = unlinked_images[i] 349 | col = filtered[:, idx] 350 | if np.any(col != 0): 351 | print(f' - Image {idx} is the start of a zoom series') 352 | start_candidates.append(idx) 353 | else: 354 | print(f' - Discarding image {idx} because it is unconnected to other images!') 355 | 356 | if len(start_candidates)==0: 357 | raise Exception("Aborting: Could not find start image!") 358 | 359 | if len(start_candidates)>1: 360 | raise Exception(f'Check the Zoom factor! If you are sure the zoom factor is correct clean up image series! I found {len(start_candidates)} different images that could be the starting image. This can happen if the zoom factor is off or if the series contains multiple images for the same zoom step.') 361 | 362 | # finally build sorted image list 363 | sequence_order = self.__assemble_image_sequence(idx, filtered) 364 | print(f' - Image sequence is {",".join(map(str, sequence_order))}') 365 | 366 | sorted_image_list = [] 367 | for idx in sequence_order: 368 | sorted_image_list.append(self.__image_list[idx]) 369 | 370 | return sorted_image_list 371 | 372 | 373 | def __assemble_image_sequence(self, start : int, conn_matrix): 374 | series = [] 375 | 376 | next_image_index = start 377 | series.insert(0, next_image_index) 378 | 379 | non_zeros = np.nonzero(conn_matrix[:, next_image_index])[0] 380 | while len(non_zeros)>0: 381 | next_image_index = non_zeros[0] 382 | series.insert(0, next_image_index) 383 | 384 | non_zeros = np.nonzero(conn_matrix[:, next_image_index])[0] 385 | 386 | return series 387 | 388 | 389 | def process(self): 390 | self.__load_images() 391 | 392 | if len(self.__image_list)==0: 393 | raise Exception("Processing failed: Image list is empty!") 394 | 395 | if self.__param.auto_sort: 396 | self.__auto_sort() 397 | 398 | h, w = self.__image_list[0].shape[:2] 399 | 400 | video_w = int(w * self.__param.zoom_image_crop) 401 | video_h = int(h * self.__param.zoom_image_crop) 402 | 403 | self.__frames = [] 404 | 405 | print(f'Generating Zoom Sequence') 406 | for i in range(len(self.__image_list)-1): 407 | img1 = self.__image_list[i] 408 | img2 = self.__image_list[i+1] 409 | 410 | self.zoom_in(img1, img2, video_w, video_h) 411 | 412 | cv2.destroyAllWindows() 413 | 414 | if self.__param.output_frames: 415 | self.__save_frames() 416 | else: 417 | self.__create_video(video_w, video_h) 418 | 419 | print(f'\nDone\n') 420 | 421 | 422 | def __save_frames(self): 423 | print(f'Saving frames to output folder {self.__param.output_folder} ') 424 | 425 | if self.__param.reverse: 426 | frames = reversed(self.__frames) 427 | else: 428 | frames = self.__frames 429 | 430 | ct = 0 431 | for frame in frames: 432 | print(f' - Saving frame {ct}', end='\r') 433 | cv2.imwrite(f'{self.__param.output_folder}/frame_{ct:05d}.png', frame) 434 | ct += 1 435 | 436 | 437 | def __create_video(self, video_w, video_h): 438 | print(f'Creating output file {self.__param.output_file} ') 439 | self.__video_writer = cv2.VideoWriter(self.__param.output_file, cv2.VideoWriter_fourcc(*'mp4v'), self.__param.fps, (video_w, video_h)) 440 | 441 | num_stills = int(self.__param.delay * self.__param.fps) 442 | 443 | if self.__param.reverse: 444 | frames = reversed(self.__frames) 445 | else: 446 | frames = self.__frames 447 | 448 | ct = 0 449 | for frame in frames: 450 | if ct==0 or ct==len(self.__frames)-1: 451 | for i in range(num_stills): 452 | self.__video_writer.write(frame) 453 | else: 454 | self.__video_writer.write(frame) 455 | 456 | ct += 1 457 | 458 | self.__video_writer.release() 459 | 460 | 461 | def __show_error_images(self, img_curr, img_next, text): 462 | combined_image = cv2.hconcat([img_curr, img_next]) 463 | 464 | h, w = combined_image.shape[:2] 465 | scale_factor = min(self.__screen_width / w, self.__screen_height / h) 466 | scale_factor = min(1, scale_factor) 467 | 468 | new_width = int(combined_image.shape[1] * scale_factor) 469 | new_height = int(combined_image.shape[0] * scale_factor) 470 | combined_image = cv2.resize(combined_image, (new_width, new_height)) 471 | 472 | 473 | for i, line in enumerate(text.split('\n')): 474 | cv2.putText( 475 | combined_image, 476 | line, 477 | (20, 20 + i*20), 478 | self.__font, 479 | self.__fontScale, 480 | (0,255,0), 481 | self.__fontThickness, 482 | self.__fontLineType) 483 | 484 | cv2.imshow("Image misalignment error", combined_image) 485 | cv2.waitKey(0) 486 | 487 | 488 | def zoom_in(self, imgCurr, imgNext, video_w, video_h): 489 | zoom_steps = self.__param.zoom_steps 490 | 491 | # imgNext has exactly a quarter the size of img 492 | h, w = imgCurr.shape[:2] 493 | cx = w // 2 494 | cy = h // 2 495 | 496 | # compute step size for each partial image zoom. Zooming is an exponential 497 | # process, so we need to compute the steps on a logarithmic scale. 498 | f = math.exp(math.log(self.__param.zoom_factor)/zoom_steps) 499 | 500 | # copy images because we will modify them 501 | img_curr = imgCurr.copy() 502 | img_next = imgNext.copy() 503 | 504 | display_scale = min(self.__screen_width / w, self.__screen_height / h) 505 | display_scale = min(1, display_scale) 506 | 507 | # Do the zoom 508 | for i in range(0, zoom_steps): 509 | zoom_factor = f**i 510 | 511 | # zoom, the outter image 512 | mtx_curr = cv2.getRotationMatrix2D((cx, cy), 0, zoom_factor) 513 | img_curr = cv2.warpAffine(imgCurr, mtx_curr, (w, h)) 514 | 515 | # zoom the inner image, zoom factor is by the image series 516 | # zoom factor smaller than that of the outter image 517 | mtx_next = cv2.getRotationMatrix2D((cx, cy), 0, zoom_factor/self.__param.zoom_factor) 518 | img_next = cv2.warpAffine(imgNext, mtx_next, (w, h)) 519 | 520 | # Zoomed inner image now has same size as outter image but is padded with 521 | # black pixels. We need to crop it to the proper size. 522 | ww = round(w * (zoom_factor/self.__param.zoom_factor)) 523 | hh = round(h * (zoom_factor/self.__param.zoom_factor)) 524 | 525 | # We cant use the entire image because close to the edges 526 | # midjourney takes liberties with the content so we crop 527 | # the inner image. (I also tries soft blending but crop 528 | # looked better) 529 | ww = int(ww * self.__param.zoom_image_crop) 530 | hh = int(hh * self.__param.zoom_image_crop) 531 | img_next = ih.crop_image(img_next, (ww, hh)) 532 | 533 | if i == 0: 534 | # The second image may not be perfectly centered. We need to determine 535 | # image offset to compensate 536 | detector = TemplateDetector(threshold=0.3, max_num=1, method=cv2.TM_CCOEFF_NORMED) 537 | detector.pattern = img_next 538 | result, result_image = detector.search(img_curr) 539 | 540 | if len(result)==0: 541 | text = f'Error: Cannot match the following two images!' 542 | self.__show_error_images(imgCurr, imgNext, text) 543 | raise Exception("Cannot match image to precursor!") 544 | 545 | # this is the "true" position that the inner image must 546 | # have to match perfectly onto the outter. Theoretically 547 | # it should always be centered to the outter image but 548 | # midjourney takes some liberties here and there may be 549 | # a significant offset (i.e. 20 Pixels). 550 | bx, by, bw, bh, score = result[0, :5] 551 | 552 | # compute initial misalignment of the second image. The second image 553 | # *should* be centered to the outter image but it is often not. 554 | # So we need to use this initial offset when we insert the inner image 555 | # in order to not have visual jumps but we have to gradually eliminate the 556 | # misalignment as we zoom out that it is zero when switching to 557 | # the next image. 558 | ma_x = int(cx - bx) 559 | ma_y = int(cy - by) 560 | 561 | # Plausibility check. If the misalignment is too large something is wrong. 562 | # Usually the images are not in sequence or a zoom step is missing. 563 | if abs(ma_x) > w/5 or abs(ma_y) > h/5: 564 | cv2.imshow("-haystack-", img_curr) 565 | cv2.waitKey(0) 566 | 567 | cv2.imshow("-needle-", img_next) 568 | cv2.waitKey(0) 569 | 570 | text = f'Error: Strong image misalignment found in zoom step {i} between these two images.\n' \ 571 | f'The offset vector is (dx={ma_x}, dy={ma_y}) which indicated an error in the sequence.\n' \ 572 | f'Images may not be in order, or contain multiple images for at least one zoom step.' 573 | self.__show_error_images(imgCurr, imgNext, text) 574 | raise Exception(f'Strong image misalignment found in step {i} (delta_x={ma_x}, delta_y={ma_y})! The images may not be in order, or the zoom factor is incorrect. Try using the "-as" option!') 575 | 576 | # How much do we need to compensate for each step? 577 | ma_comp_x = ma_x / zoom_steps 578 | ma_comp_y = ma_y / zoom_steps 579 | 580 | # Add the smaller image into the larger one but shift it to 581 | # compensate the misalignment. Problem is that when it is maximized 582 | # it would be shifted off center. We need to fix that later. 583 | hs = hh//2 + ma_y 584 | ws = ww//2 + ma_x 585 | img_curr[cy-hs:cy-hs+hh, cx-ws:cx-ws+ww] = img_next 586 | 587 | # finally we have to gradually shift the resulting image back because the 588 | # next frame should again be close to the center and the misalignment compensation 589 | # brought us away. So we gradually shift the combined image back so that the center 590 | # position remains in the center. 591 | ox = ma_comp_x * i 592 | oy = ma_comp_y * i 593 | 594 | print(f' - frame misalignment: zoom_step={i}; x_total={ma_x:.2f}; y_total={ma_y:.2f}; x_step={ma_x-ox:.2f}; y_step={ma_y-oy:.2f}', end='\r') 595 | 596 | if self.__param.debug_mode: 597 | # Draw Center cross for outter image 598 | cv2.line(img_curr, (0, 0), (w, h), (0,0,255), thickness=1) 599 | cv2.line(img_curr, (0, h), (w, 0), (0,0,255), thickness=1) 600 | 601 | # Draw rectangle around actual image 602 | cv2.rectangle(img_curr, (cx-ws, cy-hs), (cx-ws+ww, cy-hs+hh), (0,255,0), 1) 603 | 604 | # Draw Center cross for inner image 605 | cv2.line(img_curr, (cx-ws, cy-hs), (cx-ws+ww, cy-hs+hh), (0,255,0), thickness=1) 606 | cv2.line(img_curr, (cx-ws, cy-hs+hh), (cx-ws+ww, cy-hs), (0,255,0), thickness=1) 607 | 608 | 609 | mtx_shift = np.float32([[1, 0, ox], [0, 1, oy]]) 610 | img_curr = cv2.warpAffine(img_curr, mtx_shift, (img_curr.shape[1], img_curr.shape[0])) 611 | 612 | if self.__param.debug_mode: 613 | xp = (w - video_w)//2 614 | yp = (h - video_h)//2 615 | 616 | cv2.putText(img_curr, f'rel_zoom={zoom_factor:.2f}', (xp+5, yp+20), self.__font, self.__fontScale, (0,0,255), self.__fontThickness, self.__fontLineType) 617 | cv2.putText(img_curr, f'size_inner={ww:.0f}x{hh:.0f}', (xp+5, yp+40), self.__font, self.__fontScale, (0,0,255), self.__fontThickness, self.__fontLineType) 618 | cv2.putText(img_curr, f'mis_align={ma_x},{ma_y}', (xp+5, yp+60), self.__font, self.__fontScale, (0,0,255), self.__fontThickness, self.__fontLineType) 619 | cv2.putText(img_curr, f'mis_align_res={ma_x-ox:.1f},{ma_x-ox:0.1f}', (xp+5, yp+80), self.__font, self.__fontScale, (0,0,255), self.__fontThickness, self.__fontLineType) 620 | 621 | # Draw static image center marker 622 | cv2.line(img_curr, (cx, 0), (cx, h), (255, 0, 0), thickness=1) 623 | cv2.line(img_curr, (0, cy), (w, cy), (255, 0, 0), thickness=1) 624 | 625 | # final crop, ther may be some inconsiostencies at the boundaries 626 | img_curr = ih.crop_image(img_curr, (video_w, video_h)) 627 | 628 | self.__frames.append(img_curr) 629 | 630 | img_display = cv2.resize(img_curr, None, fx=display_scale, fy=display_scale, interpolation=cv2.INTER_AREA) 631 | cv2.imshow("Frame generation progress...", img_display) 632 | key = cv2.waitKey(10) 633 | if key == 27 or cv2.getWindowProperty("Frame generation progress...", cv2.WND_PROP_VISIBLE) < 1: 634 | raise Exception("User aborted!") 635 | 636 | print() 637 | 638 | 639 | 640 | --------------------------------------------------------------------------------