├── input
└── .gitkeep
├── requirements.txt
├── .gitattributes
├── .gitignore
├── exceptions
└── argument_exception.py
├── detectors
├── detector_base.py
└── template_detector.py
├── processor
└── processor_base.py
├── LICENSE
├── .vscode
└── launch.json
├── README.md
├── infinite_zoom.py
├── helper
└── image_helper.py
└── infinite_zoom_impl.py
/input/.gitkeep:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | opencv-python
2 | screeninfo
3 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 |
2 | *.png
3 | *.mp4
4 | *.pyc
5 | .vs/*
6 | *.zip
7 | *.jpg
8 | *.webm
9 |
--------------------------------------------------------------------------------
/exceptions/argument_exception.py:
--------------------------------------------------------------------------------
1 | class ArgumentException(Exception):
2 | def __init__(self, msg : str):
3 | super().__init__(msg)
4 |
--------------------------------------------------------------------------------
/detectors/detector_base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 |
4 | class DetectorBase(ABC):
5 | def __init__(self, name):
6 | self.__name = name
7 |
8 | @property
9 | def name(self):
10 | return self.__name
11 |
12 | @abstractmethod
13 | def search(self, file):
14 | pass
--------------------------------------------------------------------------------
/processor/processor_base.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 | import numpy as np
3 |
4 |
5 | class ProcessorBase(ABC):
6 | def __init__(self, name):
7 | self._name = name
8 |
9 | @property
10 | def name(self):
11 | return self._name
12 |
13 | @abstractmethod
14 | def process(image : np.array) -> np.array:
15 | pass
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 Ingo Berg
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
1 | {
2 | // Use IntelliSense to learn about possible attributes.
3 | // Hover to view descriptions of existing attributes.
4 | // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5 | "version": "0.2.0",
6 | "configurations": [
7 | {
8 | "name": "Python: Current File",
9 | "type": "python",
10 | "request": "launch",
11 | "program": "${file}",
12 | "console": "integratedTerminal",
13 | "justMyCode": true,
14 | "args": [
15 | "-zf", "2",
16 | "-zs", "100",
17 | "-zc", "0.8",
18 | "-d", "1.0",
19 | // "-i", "input/sample_1.33x",
20 | // "-i", "./input/sample_fairytale",
21 | // "-i", "./input/sample_dark",
22 | // "-i", "./input/sample_nostradamus",
23 | "-i", "./input/sample_nostradamus",
24 | // "-i", "./input/lexica",
25 | // "-i", "./input/sample_pirate",
26 | // "-i", "./input/sample2a",
27 | // "-i", "./input/sample_ps2",
28 | // "-as",
29 | "-rev",
30 | // "-dbg",
31 | // "-o", "output_frames/"]
32 | "-o", "myoutput.mp4"]
33 | }
34 | ]
35 | }
--------------------------------------------------------------------------------
/detectors/template_detector.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 | from detectors.detector_base import DetectorBase
5 |
6 |
7 | class TemplateDetector(DetectorBase):
8 | def __init__(self, threshold = 0.7, max_num = -100, method = cv2.TM_CCORR_NORMED):
9 | super(TemplateDetector, self).__init__("TemplateDetector")
10 |
11 | if method in [cv2.TM_CCOEFF, cv2.TM_CCORR, cv2.TM_SQDIFF]:
12 | raise Exception("serch requires a normalized algorithm!")
13 |
14 | self.__method = method
15 | self.__threshold = threshold
16 | self.__max_num = max_num
17 |
18 | @property
19 | def threshold(self):
20 | return self.__threshold
21 |
22 | @threshold.setter
23 | def threshold(self, value):
24 | self.__threshold = value
25 |
26 | @property
27 | def pattern(self):
28 | return self.__pattern
29 |
30 | @pattern.setter
31 | def pattern(self, pat):
32 | self.__pattern = pat
33 | self.__height, self.__width = self.__pattern.shape[:2]
34 |
35 | @property
36 | def max_num(self):
37 | return self.__max_num
38 |
39 | @max_num.setter
40 | def max_num(self, value):
41 | self.__max_num = value
42 |
43 | def load(self, file):
44 | self.__pattern = cv2.imread(file)
45 | self.__height, self.__width = self.__pattern.shape[:2]
46 |
47 |
48 | def search(self, image : np.array, threshold : float = None):
49 | if image is None:
50 | raise Exception('Image is null!')
51 |
52 | if self.__method in [cv2.TM_CCOEFF, cv2.TM_CCORR, cv2.TM_SQDIFF]:
53 | raise Exception("serch requires a normalized algorithm!")
54 |
55 | num_channels = len(image.shape)
56 | if num_channels==2:
57 | self.__pattern = cv2.cvtColor(self.__pattern, cv2.COLOR_BGR2GRAY)
58 |
59 | if image.dtype.name == 'float32' and self.__pattern.dtype.name == 'uint8':
60 | self.__pattern = np.float32(self.__pattern)
61 | self.__pattern = self.__pattern / 255.0
62 |
63 | res = cv2.matchTemplate(image, self.__pattern, self.__method)
64 | if self.__method in [cv2.TM_SQDIFF, cv2.TM_SQDIFF_NORMED]:
65 | res = 1 - res
66 |
67 | result_copy = res.copy()
68 |
69 | if threshold is None:
70 | threshold = self.__threshold
71 |
72 | img_height, img_width = image.shape[:2]
73 |
74 | max_val = 1
75 | rects = []
76 |
77 | ct = 0
78 | while max_val > threshold:
79 | min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
80 | ct += 1
81 |
82 | if ct > self.__max_num:
83 | break
84 |
85 | x, y = max_loc
86 | if max_val > threshold:
87 | h1 = np.clip(max_loc[1] - self.__height//2, 0, img_height)
88 | h2 = np.clip(max_loc[1] + self.__height//2 + 1, 0, img_height)
89 |
90 | w1 = np.clip(max_loc[0] - self.__width//2, 0, img_width)
91 | w2 = np.clip(max_loc[0] + self.__width//2 + 1, 0, img_width)
92 | res[h1:h2, w1:w2] = 0
93 |
94 | # note: The size of the match image is smaller by the size of the pattern.
95 | # therefor pattern size/2 needs to be added.
96 | rects.append((int(x + self.__width//2), int(y + self.__height//2), self.__width, self.__height, max_val, 0))
97 |
98 | return np.array(rects), result_copy
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Creating an "Inifinite Zoom" from AI-Outpainted images
2 | The Python command line script published here can turn a series of AI-generated images into a zoom animation. For more details have a look at my web page were I explain the inner workings in detail:
3 |
4 | * In German: https://beltoforion.de/de/infinite_zoom
5 | * In English: https://beltoforion.de/en/infinite_zoom
6 |
7 | Here is an example video created by the script:
8 |
9 | https://github.com/beltoforion/ai_ever_zoom/assets/2202567/78bcbe99-8dbb-48d7-88bf-f8f400ed10c9
10 |
11 | ## What is AI Outpainting?
12 | Outpainting is a technique where zoom out of an image by a certain factor while letting a generative AI
13 | fill in the newly created empty edge. By giving the AI new prompts, you can control the evolution of the scene
14 | as you zoom out.
15 |
16 | 
17 |
18 | AI outpainting requires the use of a generative AI for images and can be done with Midjourney, Dall-E or Photoshop
19 | (generative AI currently only in beta). I have only tested this command line script on Midjourney images as they are
20 | the easiest to create. In principle, this program will work with any outpainted image set.
21 |
22 | ## Preparing the images
23 | Before you start you need a set of outpainted ai images. Copy this set in a separate folder in the "input" folder. It is best
24 | to order the images in the folder by giving them sequential names (i.e. "frame_01.png", "frame_02.png", ..., "frame_10.png").
25 |
26 | Alternatively you can use the "-as" option to let the script find out the image order for you but this will take some time as
27 | each image is matched against all other images to figure out their relations automatically.
28 |
29 | * Create a set of outpainted images with the generative AI of your choice.
30 | + The first image is the innermost image of the series.
31 | + The Image series must be zoomed with respect to the center
32 | + The entire image series must use the same zoom factor (i.e. 2x)
33 | * Rename and order the image sequence by giving them sequential names. (i.e. "frame_01.png", "frame_02.png")
34 |
35 | ## Usage
36 |
37 | You need python to execute this script. Put your input images into a folder and then run the script on the content of this folder.
38 |
39 | ```python
40 | python ./infinite_zoom.py -zf 2 -zs 100 -zc 0.8 -i ./samples_ps -o video.mp4
41 | ```
42 | or an example to dump the frames without creating a video file:
43 |
44 | ```python
45 | python3 ./infinite_zoom.py -as -i ./sample_fairytale -o myframes/
46 | ```
47 |
48 |
49 | ## Command Line Options
50 |
51 | -zf
Zoom factor used for creating the outpinted image sequence. For image sequences created by Midjourney use either "2" or "1.333". (Midjourney incorrectly states that it low zoom level is 1.5 but it is actually just 1.333) For an image series created with Lexica use 1.5. If you are unsure compute the zoom factor yourself by using two successive images of the series and matching them manually in the image processing software of your choice. If the zoom factor is incorrect this script cannot work properly!
52 |
53 | -zs
Number of zoom steps for each image
54 |
55 | -zc
Crop zoomed images by this factor. Midjourney takes some liberties in modyfing the edge regions between zoom steps. They may not match perfectly.
56 |
57 | -i
Path to folder with input images.
58 |
59 | -o
Name of the output folder or file. Must either be a valid file name with an mp4 extension or a folder name. If no extension is given it is assumed to be a folder name and the output will consist of the frame dump instead of a single video file.
60 |
61 | -as
Automatically sort input images. If you use this option you can name the images arbitrarily. The script will figure out the right order.
62 |
63 | -dbg
Show debug overlays and display debug output for each auto sort step.
64 |
65 | -rev
Reverse the video. This will create a zoom out effect.
66 |
67 | -fps
Set the target framerate of the output video.
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/infinite_zoom.py:
--------------------------------------------------------------------------------
1 | import infinite_zoom_impl as izoom
2 | import argparse
3 | import pathlib
4 |
5 |
6 | from pathlib import Path
7 |
8 | def valid_crop_range(x):
9 | try:
10 | x = float(x)
11 | except ValueError:
12 | raise argparse.ArgumentTypeError(f'{x} is not a floating-point literal')
13 |
14 | if x < 0.1 or x > 0.95:
15 | raise argparse.ArgumentTypeError(f'{x:0.2f} not in range [0.1, 0.95]')
16 |
17 | return x
18 |
19 |
20 | def main():
21 | parser = argparse.ArgumentParser(description='AI Outpainting Zoom Generator - Turn an AI generated image series into an animation')
22 | parser.add_argument('-zf', '--ZoomFactor', dest='zoom_factor', help='The outpainting zoom factor set up when creating the image sequence.', required=False, type=float, default=2)
23 | parser.add_argument('-zs', '--ZoomSteps', dest='zoom_steps', help='The number of zoom steps to be generated between two successive images.', required=False, type=int, default=100)
24 | parser.add_argument('-zc', '--ZoomCrop', dest='zoom_crop', help='Set the crop factor of each zoom steps followup image. This is helpfull to hide image varyations on the edges.', required=False, type=valid_crop_range, default=0.8)
25 | parser.add_argument('-o', '--Output', dest='output', help='Name of output file or folder. If this is a folder name the output will consist of the frames. If it is not a folder name it is assumed to be the name of the mp4 output file.', required=False, type=str, default='output.mp4')
26 | parser.add_argument('-i', '--Input', dest='input_folder', help='Path to to folder containing input images.', required=True, type=str)
27 | parser.add_argument('-as', '--AutoSort', dest='auto_sort', help='Input images are unsorted, automatically sort them.', required=False, action='store_true', default=False)
28 | parser.add_argument('-dbg', '--Debug', dest='debug', help='Enable debug aides', required=False, action='store_true', default=False)
29 | parser.add_argument('-d', '--Delay', dest='delay', help='Start/Stop delay in seconds', required=False, type=float, default=0.0)
30 | parser.add_argument('-rev', '--Reverse', dest='reverse', help='Reverse the output video.', required=False, action='store_true', default=False)
31 | parser.add_argument('-fps', '--FramesPerSecond', dest='fps', help='Frames per second in the output video.', required=False, type=float, default=60.0)
32 |
33 | args = parser.parse_args()
34 |
35 | print('\r\n')
36 | print('AI Outpainting Zoom Video Generator')
37 | print('-----------------------------------')
38 | print(f' - input folder: "{args.input_folder}"')
39 | print(f' - output: "{args.output}"')
40 | print(f' - fps: {args.fps}')
41 | print(f' - zoom factor: {args.zoom_factor}')
42 | print(f' - zoom steps: {args.zoom_steps}')
43 | print(f' - zoom crop: {args.zoom_crop}')
44 | print(f' - auto sort: {args.auto_sort}')
45 | print(f' - debug aides: {args.debug}')
46 | print(f' - delay: {args.delay}')
47 | print(f' - reverse: {args.reverse}')
48 |
49 | param = izoom.InfiniZoomParameter()
50 | param.reverse = args.reverse
51 | param.auto_sort = args.auto_sort
52 | param.debug_mode = args.debug
53 | param.zoom_image_crop = args.zoom_crop
54 | param.zoom_steps = args.zoom_steps
55 | param.zoom_factor = args.zoom_factor # The zoom factor used by midjourney
56 | param.input_path = Path(args.input_folder)
57 | param.delay = args.delay
58 | param.fps = args.fps
59 |
60 | # when no extension is present we assume it is an output folder. In this
61 | # case no video is created and all frames are saved into the output folder
62 | output_ext = pathlib.Path(args.output).suffix
63 | if output_ext == '':
64 | if not pathlib.Path(args.output).is_dir():
65 | print(f"\nCreating frame output folder: {args.output}")
66 | pathlib.Path(args.output).mkdir(parents=True, exist_ok=True)
67 |
68 | param.output_frames = True
69 | param.output_file = None
70 | param.output_folder = args.output
71 | else:
72 | param.output_frames = False
73 | param.output_file = args.output # name of the output video file
74 | param.output_folder = None
75 |
76 | iz = izoom.InfiniZoom(param)
77 | iz.process()
78 |
79 | if __name__ == "__main__":
80 | main()
--------------------------------------------------------------------------------
/helper/image_helper.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import pathlib
3 | import cv2
4 | from typing import Union
5 |
6 | from processor.processor_base import *
7 |
8 |
9 | def crop_image(image, crop_size):
10 | """ Crop an image.
11 |
12 | Crop is done centric.
13 | """
14 | h, w = image.shape[:2]
15 | cx = w //2
16 | cy = h //2
17 |
18 | start_x = cx - crop_size[0]//2
19 | start_y = cy - crop_size[1]//2
20 |
21 | end_x = start_x + crop_size[0]
22 | end_y = start_y + crop_size[1]
23 |
24 | cropped_image = image[start_y:end_y, start_x:end_x]
25 |
26 | return cropped_image
27 |
28 |
29 | def draw_cross(image, center, size, color=(0, 255, 0), thickness=2):
30 | """
31 | Draw a cross on the given image at the specified center coordinates with the given size, color, and thickness.
32 |
33 | Parameters:
34 | image (numpy.ndarray): The image on which to draw the cross.
35 | center (tuple): The center coordinates of the cross in (x, y) format.
36 | size (int): The size of the cross.
37 | color (tuple): The color of the cross in BGR format. Default is green (0, 255, 0).
38 | thickness (int): The thickness of the cross lines. Default is 2.
39 | """
40 | x, y = center
41 | half_size = size // 2
42 |
43 | cv2.line(image, (x - half_size, y), (x + half_size, y), color, thickness)
44 | cv2.line(image, (x, y - half_size), (x, y + half_size), color, thickness)
45 |
46 |
47 | def read_image(file : str, processor : Union[ProcessorBase, list] = None):
48 | """ Read an image in raw or jpeg.
49 | The image will be preprocesses with a list of processors.
50 | """
51 |
52 | ext = pathlib.Path(file).suffix
53 | image : np.array = cv2.imread(file)
54 |
55 | original_image = image.copy()
56 |
57 | if type(processor) is list:
58 | for p in processor:
59 | image = p.process(image)
60 | elif isinstance(processor, ProcessorBase):
61 | image = processor.process(image)
62 | elif processor is None:
63 | pass
64 |
65 | return image, original_image
66 |
67 |
68 | def read_images_folder(path : pathlib.Path):
69 | pathlist = sorted(path.glob('*.*'))
70 |
71 | images = []
72 |
73 | for path in pathlist:
74 | if not path.suffix.lower() in ['.png', '.jpg', '.jpeg']:
75 | continue
76 |
77 | img_orig, _ = read_image(str(path), None)
78 | if img_orig is not None:
79 | images.append(img_orig)
80 |
81 | if len(images)>0:
82 | first_image_shape = images[0].shape
83 | if not all(image.shape == first_image_shape for image in images):
84 | raise Exception(f"Reading images failed because not all images in the folder have equal size! Expected image size is {first_image_shape[:2]}.")
85 |
86 | return np.array(images)
87 |
88 |
89 | def create_radial_mask(h, w, inner_radius_fraction=0.4, outer_radius_fraction=1.0):
90 | center = (int(w / 2), int(h / 2))
91 | inner_radius = int(min(center[0], center[1]) * inner_radius_fraction)
92 | outer_radius = int(min(center[0], center[1]) * outer_radius_fraction)
93 |
94 | Y, X = np.ogrid[:h, :w]
95 | dist_from_center = np.sqrt((X - center[0]) ** 2 + (Y - center[1]) ** 2)
96 |
97 | mask = np.zeros((h, w))
98 | mask[dist_from_center <= inner_radius] = 1.0
99 | mask[dist_from_center >= outer_radius] = 0.0
100 |
101 | transition = np.logical_and(dist_from_center > inner_radius, dist_from_center < outer_radius)
102 | mask[transition] = (outer_radius - dist_from_center[transition]) / (outer_radius - inner_radius)
103 |
104 | return mask
105 |
106 | def overlay_images(background, foreground, position, relative_to='corner', opacity=1):
107 | # Get the dimensions of the foreground image
108 | fh, fw, _ = foreground.shape
109 |
110 | # Create an alpha mask of the same size as the foreground image
111 | mask = create_radial_mask(fh, fw, 0.4, 1 + opacity)
112 |
113 | # Convert foreground to float and normalize
114 | foreground = foreground.astype(float) / 255
115 |
116 | # Create a 4-channel image (RGB + alpha) for the foreground
117 | foreground_alpha = np.dstack([foreground, mask])
118 |
119 | # Get the position
120 | x, y = position
121 |
122 | # If position is relative to the center, adjust the position
123 | if relative_to == 'center':
124 | y = background.shape[0]//2 - fh//2 + y
125 | x = background.shape[1]//2 - fw//2 + x
126 |
127 | # Calculate the overlay region
128 | overlay_x_start = max(x, 0)
129 | overlay_y_start = max(y, 0)
130 | overlay_x_end = min(x+fw, background.shape[1])
131 | overlay_y_end = min(y+fh, background.shape[0])
132 |
133 | # Calculate the region of the foreground to be overlayed
134 | foreground_x_start = max(0, -x)
135 | foreground_y_start = max(0, -y)
136 | foreground_x_end = min(fw, overlay_x_end - x)
137 | foreground_y_end = min(fh, overlay_y_end - y)
138 |
139 | # Prepare the overlay with the correct opacity
140 | foreground_region = foreground_alpha[foreground_y_start:foreground_y_end, foreground_x_start:foreground_x_end]
141 | background_region = background[overlay_y_start:overlay_y_end, overlay_x_start:overlay_x_end] / 255
142 |
143 | overlay = (foreground_region[..., :3] * foreground_region[..., 3:4] * opacity +
144 | background_region * (1 - foreground_region[..., 3:4] * opacity)) * 255
145 |
146 | # Overlay the appropriately sized and positioned region of the foreground onto the background
147 | background[overlay_y_start:overlay_y_end, overlay_x_start:overlay_x_end] = overlay.astype(np.uint8)
148 |
149 | return background
150 |
--------------------------------------------------------------------------------
/infinite_zoom_impl.py:
--------------------------------------------------------------------------------
1 | from pathlib import Path
2 | from exceptions.argument_exception import ArgumentException
3 | from detectors.template_detector import *
4 |
5 | import helper.image_helper as ih
6 | import cv2
7 | import screeninfo
8 | import math
9 | import time
10 |
11 |
12 | class InfiniZoomParameter:
13 | def __init__(self):
14 | self.__zoom_steps = 100
15 | self.__reverse = False
16 | self.__auto_sort = False
17 | self.__zoom_image_crop = 0.8
18 | self.__zoom_factor = 2
19 | self.__debug_mode = False
20 | self.__fps = 60
21 | self.__output_frames = False
22 | self.__output_folder = ''
23 | self.__output_file = ''
24 |
25 | @property
26 | def output_frames(self):
27 | return self.__output_frames
28 |
29 | @output_frames.setter
30 | def output_frames(self, value: float):
31 | self.__output_frames = value
32 |
33 | @property
34 | def fps(self):
35 | return self.__fps
36 |
37 | @fps.setter
38 | def fps(self, value: float):
39 | self.__fps = value
40 |
41 | @property
42 | def delay(self):
43 | return self.__delay
44 |
45 | @delay.setter
46 | def delay(self, value: float):
47 | self.__delay = value
48 |
49 | @property
50 | def debug_mode(self):
51 | return self.__debug_mode
52 |
53 | @debug_mode.setter
54 | def debug_mode(self, state: bool):
55 | self.__debug_mode = state
56 |
57 | @property
58 | def zoom_factor(self):
59 | return self.__zoom_factor
60 |
61 | @zoom_factor.setter
62 | def zoom_factor(self, f: float):
63 | self.__zoom_factor = f
64 |
65 | @property
66 | def zoom_image_crop(self):
67 | return self.__zoom_image_crop
68 |
69 | @zoom_image_crop.setter
70 | def zoom_image_crop(self, crop: float):
71 | self.__zoom_image_crop = crop
72 |
73 | @property
74 | def reverse(self):
75 | return self.__reverse
76 |
77 | @reverse.setter
78 | def reverse(self, stat: bool):
79 | self.__reverse = stat
80 |
81 | @property
82 | def auto_sort(self):
83 | return self.__auto_sort
84 |
85 | @auto_sort.setter
86 | def auto_sort(self, stat: bool):
87 | self.__auto_sort = stat
88 |
89 | @property
90 | def zoom_steps(self):
91 | return self.__zoom_steps
92 |
93 | @zoom_steps.setter
94 | def zoom_steps(self, steps: int):
95 | if steps<1:
96 | raise ArgumentException("Range error: steps must be greater than 0")
97 |
98 | self.__zoom_steps = steps
99 |
100 | @property
101 | def input_path(self):
102 | return self.__input_path
103 |
104 | @input_path.setter
105 | def input_path(self, path : Path):
106 | self.__input_path = path
107 |
108 | @property
109 | def output_file(self):
110 | return self.__output_file
111 |
112 | @output_file.setter
113 | def output_file(self, file):
114 | self.__output_file = file
115 |
116 | @property
117 | def output_folder(self):
118 | return self.__output_folder
119 |
120 | @output_folder.setter
121 | def output_folder(self, folder):
122 | self.__output_folder = folder
123 |
124 |
125 | class InfiniZoom:
126 | def __init__(self, param : InfiniZoomParameter):
127 | self.__param = param
128 | self.__image_list = []
129 | self.__video_writer = None
130 | self.__frames = []
131 |
132 | self.__font = cv2.FONT_HERSHEY_DUPLEX
133 | self.__fontScale = 0.6
134 | self.__fontThickness = 1
135 | self.__fontLineType = 1
136 |
137 | # get screen resolution
138 | screen = screeninfo.get_monitors()[0]
139 | self.__screen_width = screen.width
140 | self.__screen_height = screen.height
141 |
142 |
143 | def __load_images(self):
144 | if not self.__param.input_path.exists():
145 | raise Exception("input path does not exist")
146 |
147 | print(f'\nReading images from "{str(self.__param.input_path)}"')
148 | self.__image_list = ih.read_images_folder(self.__param.input_path)
149 | print(f' - {len(self.__image_list)} images read\n')
150 |
151 |
152 | def __print_matrix(self, matrix):
153 | rows, cols = matrix.shape
154 | for i in range(rows):
155 | for j in range(cols):
156 | if matrix[i, j]==0:
157 | print(' -- ', end=" ") # Print element followed by a tab
158 | else:
159 | print(f'{matrix[i, j]:.2f}', end=" ") # Print element followed by a tab
160 |
161 | print()
162 |
163 |
164 | def __auto_sort(self):
165 | print(f'Determining image order')
166 |
167 | # ibg 2023-08-19: #1
168 | # changed method to TM_CCOEFF_NORMED from TM_CCORR_NORMED because the latter
169 | # one failed with some images. Well it did not really fail but if found a
170 | # false match to the first image in the series with a score of 0.92 (all other
171 | # matches had a clean 1.0).
172 | detector = TemplateDetector(threshold=0.01, max_num=1, method = cv2.TM_CCOEFF_NORMED)
173 |
174 | num = len(self.__image_list)
175 | scores = np.zeros((num, num))
176 |
177 | prog = 0
178 | debug_frames = None
179 |
180 | for i in range(0, num):
181 | max_score = 0
182 | best_match = None
183 |
184 | for j in range(0, num):
185 | if i==j:
186 | continue
187 |
188 | prog += 1
189 | print(f' - matching images {100*prog/(num*num-num):.0f} % ', end='\r')
190 |
191 | img1 = self.__image_list[i].copy()
192 | img2 = self.__image_list[j].copy()
193 | if img1.shape != img2.shape:
194 | raise Exception("Auto sort failed: Inconsistent image sizes!")
195 |
196 | h, w = img1.shape[:2]
197 |
198 | mtx_scale = cv2.getRotationMatrix2D((0, 0), 0, 1/self.__param.zoom_factor)
199 | img2 = cv2.warpAffine(img2, mtx_scale, (int(w*1/self.__param.zoom_factor), int(h*1/self.__param.zoom_factor)))
200 |
201 | detector.pattern = img2
202 | result, result_img = detector.search(img1)
203 |
204 | if len(result)==0:
205 | print(f'Correlating image {i} with image {j}: Cannot find any related image. The series zoom factor is incorrect or you have unrealted images in the input folder!')
206 | continue
207 | else:
208 | bx, by, bw, bh, score = result[0, :5]
209 |
210 | if score > max_score:
211 | max_score = score
212 | best_match = self.__image_list[j].copy()
213 |
214 | if self.__param.debug_mode:
215 | # convert result_img to 8 bit
216 | result_img = np.clip(result_img * 255, 0, 255).astype(np.uint8)
217 |
218 | # copy correlation image centered into an image with the same size
219 | # as the original
220 | corr_result = np.zeros(img1.shape, np.uint8)
221 | rh, rw = result_img.shape[0:2]
222 | corr_result[rh//2:(rh//2)+rh, rw//2:(rw//2)+rw, :] = result_img[..., np.newaxis]
223 |
224 | overview_image = np.zeros((h*2, w*2, 3), np.uint8)
225 | overview_image[0:h, 0:w, :] = img1
226 | overview_image[0:h, w:w+best_match.shape[1], :] = best_match
227 |
228 | overview_image[h:h+img2.shape[0], 0:img2.shape[1], :] = img2
229 | overview_image[h:h+corr_result.shape[0], w:w+corr_result.shape[1], :] = corr_result
230 |
231 | max_width = 1200
232 | scale = max_width / overview_image.shape[1]
233 | new_width = int(overview_image.shape[1] * scale)
234 | new_height = int(overview_image.shape[0] * scale)
235 |
236 | overview_image = cv2.resize(overview_image, (new_width, new_height))
237 | ho, wo = overview_image.shape[:2]
238 | cv2.putText(overview_image, f'series image {i} of {num}; progress is {100*prog/(num*num-num):.0f} %', (20, 20), self.__font, self.__fontScale, (0,255,0), self.__fontThickness, self.__fontLineType)
239 | cv2.putText(overview_image, f'best match so far; score={max_score:.2f}', (wo//2 + 20, 20), self.__font, self.__fontScale, (0,255,0), self.__fontThickness, self.__fontLineType)
240 | cv2.putText(overview_image, f'normalized cross correlation', (wo//2 + 20, ho//2+20), self.__font, self.__fontScale, (0,255,0), self.__fontThickness, self.__fontLineType)
241 | cv2.putText(overview_image, f'candidate {j}', (20, ho//2+20), self.__font, self.__fontScale, (0,255,0), self.__fontThickness, self.__fontLineType)
242 |
243 | if debug_frames!=None:
244 | debug_frames.append(overview_image.copy())
245 |
246 | cv2.imshow("Finding image order", self.__downscale_to_screen(overview_image, 1920, 1080))
247 | cv2.waitKey(10)
248 |
249 | scores[i, j] = score
250 |
251 | cv2.waitKey()
252 |
253 | if debug_frames!=None:
254 | time.sleep(1)
255 |
256 | for i in range(0,20):
257 | debug_frames.append(overview_image.copy())
258 |
259 | # process the data to find the best matches for each image
260 | self.__image_list = self.__filter_array(scores)
261 | cv2.destroyAllWindows()
262 |
263 | if debug_frames!=None:
264 | vh, vw = overview_image.shape[:2]
265 | self.__video_writer = cv2.VideoWriter("debug.mp4", cv2.VideoWriter_fourcc(*'mp4v'), 10, (vw, vh))
266 |
267 | for frame in debug_frames:
268 | self.__video_writer.write(frame)
269 |
270 | self.__video_writer.release()
271 |
272 |
273 | def __downscale_to_screen(self, img, screen_width, screen_height):
274 | """
275 | Downscale an image so that it fits the screen dimensions while maintaining its aspect ratio.
276 |
277 | Args:
278 | - img (numpy.ndarray): The input image.
279 | - screen_width (int): The width of the screen.
280 | - screen_height (int): The height of the screen.
281 |
282 | Returns:
283 | - numpy.ndarray: The downscaled image.
284 | """
285 | # Obtain the width and height of the image
286 | img_height, img_width = img.shape[:2]
287 |
288 | # Determine the aspect ratio of the image
289 | aspect_ratio = img_width / img_height
290 |
291 | # Calculate the dimensions if we were to fit by width
292 | new_width_by_w = screen_width
293 | new_height_by_w = int(screen_width / aspect_ratio)
294 |
295 | # Calculate the dimensions if we were to fit by height
296 | new_width_by_h = int(screen_height * aspect_ratio)
297 | new_height_by_h = screen_height
298 |
299 | # Choose the dimensions that fit within the screen
300 | if new_width_by_w <= screen_width and new_height_by_w <= screen_height:
301 | new_width, new_height = new_width_by_w, new_height_by_w
302 | else:
303 | new_width, new_height = new_width_by_h, new_height_by_h
304 |
305 | # Resize the image
306 | resized_img = cv2.resize(img, (new_width, new_height))
307 |
308 | return resized_img
309 |
310 |
311 | def __filter_array(self, arr):
312 | filtered = np.zeros(arr.shape)
313 |
314 | # Get the indices of the row and column maxima
315 | row_max_indices = np.argmax(arr, axis=1)
316 | col_max_indices = np.argmax(arr, axis=0)
317 |
318 | # We need to find the first image of the series now. To do this we must check the
319 | # images that could not be matched as a follow up image to any of the images.
320 | # This could be images that were added accidentally but the first image is also unlinked!
321 | unlinked_images = []
322 | for r in range(arr.shape[0]):
323 | # index of the maximum value of row r
324 | col_max = row_max_indices[r]
325 |
326 | # is this also the column maximum?
327 | if col_max_indices[col_max]==r:
328 | filtered[r, col_max] = arr[r, col_max]
329 | else:
330 | unlinked_images.append(r)
331 |
332 | # Print the image connection matrix
333 | print(f'\n\nImage relation matrix:')
334 | self.__print_matrix(filtered)
335 |
336 | # Now eliminate all unlinked images and find the first one:
337 | num_unlinked = len(unlinked_images)
338 | # print(f' - found {num_unlinked} unlinked images.')
339 | if num_unlinked == 0:
340 | print(f' - Warning: Cannot identify the first frame! This means that any image in the sequence is a good match as a follow-up image to another image in the same series.')
341 |
342 | if num_unlinked > 1:
343 | print(f' - Warning: Your series contains {num_unlinked-1} images that cannot be matched!')
344 |
345 | print('\nFinding first image:')
346 | start_candidates = []
347 | for i in range(0, len(unlinked_images)):
348 | idx = unlinked_images[i]
349 | col = filtered[:, idx]
350 | if np.any(col != 0):
351 | print(f' - Image {idx} is the start of a zoom series')
352 | start_candidates.append(idx)
353 | else:
354 | print(f' - Discarding image {idx} because it is unconnected to other images!')
355 |
356 | if len(start_candidates)==0:
357 | raise Exception("Aborting: Could not find start image!")
358 |
359 | if len(start_candidates)>1:
360 | raise Exception(f'Check the Zoom factor! If you are sure the zoom factor is correct clean up image series! I found {len(start_candidates)} different images that could be the starting image. This can happen if the zoom factor is off or if the series contains multiple images for the same zoom step.')
361 |
362 | # finally build sorted image list
363 | sequence_order = self.__assemble_image_sequence(idx, filtered)
364 | print(f' - Image sequence is {",".join(map(str, sequence_order))}')
365 |
366 | sorted_image_list = []
367 | for idx in sequence_order:
368 | sorted_image_list.append(self.__image_list[idx])
369 |
370 | return sorted_image_list
371 |
372 |
373 | def __assemble_image_sequence(self, start : int, conn_matrix):
374 | series = []
375 |
376 | next_image_index = start
377 | series.insert(0, next_image_index)
378 |
379 | non_zeros = np.nonzero(conn_matrix[:, next_image_index])[0]
380 | while len(non_zeros)>0:
381 | next_image_index = non_zeros[0]
382 | series.insert(0, next_image_index)
383 |
384 | non_zeros = np.nonzero(conn_matrix[:, next_image_index])[0]
385 |
386 | return series
387 |
388 |
389 | def process(self):
390 | self.__load_images()
391 |
392 | if len(self.__image_list)==0:
393 | raise Exception("Processing failed: Image list is empty!")
394 |
395 | if self.__param.auto_sort:
396 | self.__auto_sort()
397 |
398 | h, w = self.__image_list[0].shape[:2]
399 |
400 | video_w = int(w * self.__param.zoom_image_crop)
401 | video_h = int(h * self.__param.zoom_image_crop)
402 |
403 | self.__frames = []
404 |
405 | print(f'Generating Zoom Sequence')
406 | for i in range(len(self.__image_list)-1):
407 | img1 = self.__image_list[i]
408 | img2 = self.__image_list[i+1]
409 |
410 | self.zoom_in(img1, img2, video_w, video_h)
411 |
412 | cv2.destroyAllWindows()
413 |
414 | if self.__param.output_frames:
415 | self.__save_frames()
416 | else:
417 | self.__create_video(video_w, video_h)
418 |
419 | print(f'\nDone\n')
420 |
421 |
422 | def __save_frames(self):
423 | print(f'Saving frames to output folder {self.__param.output_folder} ')
424 |
425 | if self.__param.reverse:
426 | frames = reversed(self.__frames)
427 | else:
428 | frames = self.__frames
429 |
430 | ct = 0
431 | for frame in frames:
432 | print(f' - Saving frame {ct}', end='\r')
433 | cv2.imwrite(f'{self.__param.output_folder}/frame_{ct:05d}.png', frame)
434 | ct += 1
435 |
436 |
437 | def __create_video(self, video_w, video_h):
438 | print(f'Creating output file {self.__param.output_file} ')
439 | self.__video_writer = cv2.VideoWriter(self.__param.output_file, cv2.VideoWriter_fourcc(*'mp4v'), self.__param.fps, (video_w, video_h))
440 |
441 | num_stills = int(self.__param.delay * self.__param.fps)
442 |
443 | if self.__param.reverse:
444 | frames = reversed(self.__frames)
445 | else:
446 | frames = self.__frames
447 |
448 | ct = 0
449 | for frame in frames:
450 | if ct==0 or ct==len(self.__frames)-1:
451 | for i in range(num_stills):
452 | self.__video_writer.write(frame)
453 | else:
454 | self.__video_writer.write(frame)
455 |
456 | ct += 1
457 |
458 | self.__video_writer.release()
459 |
460 |
461 | def __show_error_images(self, img_curr, img_next, text):
462 | combined_image = cv2.hconcat([img_curr, img_next])
463 |
464 | h, w = combined_image.shape[:2]
465 | scale_factor = min(self.__screen_width / w, self.__screen_height / h)
466 | scale_factor = min(1, scale_factor)
467 |
468 | new_width = int(combined_image.shape[1] * scale_factor)
469 | new_height = int(combined_image.shape[0] * scale_factor)
470 | combined_image = cv2.resize(combined_image, (new_width, new_height))
471 |
472 |
473 | for i, line in enumerate(text.split('\n')):
474 | cv2.putText(
475 | combined_image,
476 | line,
477 | (20, 20 + i*20),
478 | self.__font,
479 | self.__fontScale,
480 | (0,255,0),
481 | self.__fontThickness,
482 | self.__fontLineType)
483 |
484 | cv2.imshow("Image misalignment error", combined_image)
485 | cv2.waitKey(0)
486 |
487 |
488 | def zoom_in(self, imgCurr, imgNext, video_w, video_h):
489 | zoom_steps = self.__param.zoom_steps
490 |
491 | # imgNext has exactly a quarter the size of img
492 | h, w = imgCurr.shape[:2]
493 | cx = w // 2
494 | cy = h // 2
495 |
496 | # compute step size for each partial image zoom. Zooming is an exponential
497 | # process, so we need to compute the steps on a logarithmic scale.
498 | f = math.exp(math.log(self.__param.zoom_factor)/zoom_steps)
499 |
500 | # copy images because we will modify them
501 | img_curr = imgCurr.copy()
502 | img_next = imgNext.copy()
503 |
504 | display_scale = min(self.__screen_width / w, self.__screen_height / h)
505 | display_scale = min(1, display_scale)
506 |
507 | # Do the zoom
508 | for i in range(0, zoom_steps):
509 | zoom_factor = f**i
510 |
511 | # zoom, the outter image
512 | mtx_curr = cv2.getRotationMatrix2D((cx, cy), 0, zoom_factor)
513 | img_curr = cv2.warpAffine(imgCurr, mtx_curr, (w, h))
514 |
515 | # zoom the inner image, zoom factor is by the image series
516 | # zoom factor smaller than that of the outter image
517 | mtx_next = cv2.getRotationMatrix2D((cx, cy), 0, zoom_factor/self.__param.zoom_factor)
518 | img_next = cv2.warpAffine(imgNext, mtx_next, (w, h))
519 |
520 | # Zoomed inner image now has same size as outter image but is padded with
521 | # black pixels. We need to crop it to the proper size.
522 | ww = round(w * (zoom_factor/self.__param.zoom_factor))
523 | hh = round(h * (zoom_factor/self.__param.zoom_factor))
524 |
525 | # We cant use the entire image because close to the edges
526 | # midjourney takes liberties with the content so we crop
527 | # the inner image. (I also tries soft blending but crop
528 | # looked better)
529 | ww = int(ww * self.__param.zoom_image_crop)
530 | hh = int(hh * self.__param.zoom_image_crop)
531 | img_next = ih.crop_image(img_next, (ww, hh))
532 |
533 | if i == 0:
534 | # The second image may not be perfectly centered. We need to determine
535 | # image offset to compensate
536 | detector = TemplateDetector(threshold=0.3, max_num=1, method=cv2.TM_CCOEFF_NORMED)
537 | detector.pattern = img_next
538 | result, result_image = detector.search(img_curr)
539 |
540 | if len(result)==0:
541 | text = f'Error: Cannot match the following two images!'
542 | self.__show_error_images(imgCurr, imgNext, text)
543 | raise Exception("Cannot match image to precursor!")
544 |
545 | # this is the "true" position that the inner image must
546 | # have to match perfectly onto the outter. Theoretically
547 | # it should always be centered to the outter image but
548 | # midjourney takes some liberties here and there may be
549 | # a significant offset (i.e. 20 Pixels).
550 | bx, by, bw, bh, score = result[0, :5]
551 |
552 | # compute initial misalignment of the second image. The second image
553 | # *should* be centered to the outter image but it is often not.
554 | # So we need to use this initial offset when we insert the inner image
555 | # in order to not have visual jumps but we have to gradually eliminate the
556 | # misalignment as we zoom out that it is zero when switching to
557 | # the next image.
558 | ma_x = int(cx - bx)
559 | ma_y = int(cy - by)
560 |
561 | # Plausibility check. If the misalignment is too large something is wrong.
562 | # Usually the images are not in sequence or a zoom step is missing.
563 | if abs(ma_x) > w/5 or abs(ma_y) > h/5:
564 | cv2.imshow("-haystack-", img_curr)
565 | cv2.waitKey(0)
566 |
567 | cv2.imshow("-needle-", img_next)
568 | cv2.waitKey(0)
569 |
570 | text = f'Error: Strong image misalignment found in zoom step {i} between these two images.\n' \
571 | f'The offset vector is (dx={ma_x}, dy={ma_y}) which indicated an error in the sequence.\n' \
572 | f'Images may not be in order, or contain multiple images for at least one zoom step.'
573 | self.__show_error_images(imgCurr, imgNext, text)
574 | raise Exception(f'Strong image misalignment found in step {i} (delta_x={ma_x}, delta_y={ma_y})! The images may not be in order, or the zoom factor is incorrect. Try using the "-as" option!')
575 |
576 | # How much do we need to compensate for each step?
577 | ma_comp_x = ma_x / zoom_steps
578 | ma_comp_y = ma_y / zoom_steps
579 |
580 | # Add the smaller image into the larger one but shift it to
581 | # compensate the misalignment. Problem is that when it is maximized
582 | # it would be shifted off center. We need to fix that later.
583 | hs = hh//2 + ma_y
584 | ws = ww//2 + ma_x
585 | img_curr[cy-hs:cy-hs+hh, cx-ws:cx-ws+ww] = img_next
586 |
587 | # finally we have to gradually shift the resulting image back because the
588 | # next frame should again be close to the center and the misalignment compensation
589 | # brought us away. So we gradually shift the combined image back so that the center
590 | # position remains in the center.
591 | ox = ma_comp_x * i
592 | oy = ma_comp_y * i
593 |
594 | print(f' - frame misalignment: zoom_step={i}; x_total={ma_x:.2f}; y_total={ma_y:.2f}; x_step={ma_x-ox:.2f}; y_step={ma_y-oy:.2f}', end='\r')
595 |
596 | if self.__param.debug_mode:
597 | # Draw Center cross for outter image
598 | cv2.line(img_curr, (0, 0), (w, h), (0,0,255), thickness=1)
599 | cv2.line(img_curr, (0, h), (w, 0), (0,0,255), thickness=1)
600 |
601 | # Draw rectangle around actual image
602 | cv2.rectangle(img_curr, (cx-ws, cy-hs), (cx-ws+ww, cy-hs+hh), (0,255,0), 1)
603 |
604 | # Draw Center cross for inner image
605 | cv2.line(img_curr, (cx-ws, cy-hs), (cx-ws+ww, cy-hs+hh), (0,255,0), thickness=1)
606 | cv2.line(img_curr, (cx-ws, cy-hs+hh), (cx-ws+ww, cy-hs), (0,255,0), thickness=1)
607 |
608 |
609 | mtx_shift = np.float32([[1, 0, ox], [0, 1, oy]])
610 | img_curr = cv2.warpAffine(img_curr, mtx_shift, (img_curr.shape[1], img_curr.shape[0]))
611 |
612 | if self.__param.debug_mode:
613 | xp = (w - video_w)//2
614 | yp = (h - video_h)//2
615 |
616 | cv2.putText(img_curr, f'rel_zoom={zoom_factor:.2f}', (xp+5, yp+20), self.__font, self.__fontScale, (0,0,255), self.__fontThickness, self.__fontLineType)
617 | cv2.putText(img_curr, f'size_inner={ww:.0f}x{hh:.0f}', (xp+5, yp+40), self.__font, self.__fontScale, (0,0,255), self.__fontThickness, self.__fontLineType)
618 | cv2.putText(img_curr, f'mis_align={ma_x},{ma_y}', (xp+5, yp+60), self.__font, self.__fontScale, (0,0,255), self.__fontThickness, self.__fontLineType)
619 | cv2.putText(img_curr, f'mis_align_res={ma_x-ox:.1f},{ma_x-ox:0.1f}', (xp+5, yp+80), self.__font, self.__fontScale, (0,0,255), self.__fontThickness, self.__fontLineType)
620 |
621 | # Draw static image center marker
622 | cv2.line(img_curr, (cx, 0), (cx, h), (255, 0, 0), thickness=1)
623 | cv2.line(img_curr, (0, cy), (w, cy), (255, 0, 0), thickness=1)
624 |
625 | # final crop, ther may be some inconsiostencies at the boundaries
626 | img_curr = ih.crop_image(img_curr, (video_w, video_h))
627 |
628 | self.__frames.append(img_curr)
629 |
630 | img_display = cv2.resize(img_curr, None, fx=display_scale, fy=display_scale, interpolation=cv2.INTER_AREA)
631 | cv2.imshow("Frame generation progress...", img_display)
632 | key = cv2.waitKey(10)
633 | if key == 27 or cv2.getWindowProperty("Frame generation progress...", cv2.WND_PROP_VISIBLE) < 1:
634 | raise Exception("User aborted!")
635 |
636 | print()
637 |
638 |
639 |
640 |
--------------------------------------------------------------------------------