├── __init__.py ├── img ├── test1.jpg ├── test2.jpg ├── test3.jpg ├── test4.jpg ├── test5.jpg ├── test6.jpg ├── straight_lines1.jpg └── straight_lines2.jpg ├── LICENSE ├── tensor_manipulation.py ├── .gitignore ├── filesystem.py ├── list_helper.py ├── README.md ├── stitching.py ├── io_helper.py └── bbox_helper.py /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /img/test1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndrplz/computer_vision_utils/HEAD/img/test1.jpg -------------------------------------------------------------------------------- /img/test2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndrplz/computer_vision_utils/HEAD/img/test2.jpg -------------------------------------------------------------------------------- /img/test3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndrplz/computer_vision_utils/HEAD/img/test3.jpg -------------------------------------------------------------------------------- /img/test4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndrplz/computer_vision_utils/HEAD/img/test4.jpg -------------------------------------------------------------------------------- /img/test5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndrplz/computer_vision_utils/HEAD/img/test5.jpg -------------------------------------------------------------------------------- /img/test6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndrplz/computer_vision_utils/HEAD/img/test6.jpg -------------------------------------------------------------------------------- /img/straight_lines1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndrplz/computer_vision_utils/HEAD/img/straight_lines1.jpg -------------------------------------------------------------------------------- /img/straight_lines2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ndrplz/computer_vision_utils/HEAD/img/straight_lines2.jpg -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Andrea Palazzi 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tensor_manipulation.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | def resize_tensor(tensor, new_shape): 6 | """ 7 | Resize a numeric input 3D tensor with opencv. Each channel is resized independently from the others. 8 | 9 | Parameters 10 | ---------- 11 | tensor: ndarray 12 | Numeric 3D tensor of shape (channels, h, w) 13 | new_shape: tuple 14 | Tuple (new_h, new_w) 15 | 16 | Returns 17 | ------- 18 | new_tensor: ndarray 19 | Resized tensor having size (channels, new_h, new_w) 20 | """ 21 | channels = tensor.shape[0] 22 | new_tensor = np.zeros(shape=(channels,) + new_shape) 23 | for i in range(0, channels): 24 | new_tensor[i] = cv2.resize(tensor[i], dsize=new_shape[::-1]) 25 | 26 | return new_tensor 27 | 28 | 29 | def crop_tensor(tensor, indexes): 30 | """ 31 | Crop a numeric 3D input tensor. 32 | 33 | Parameters 34 | ---------- 35 | tensor: ndarray 36 | Numeric 3D tensor of shape (channels, h, w) 37 | indexes: tuple 38 | Crop indexes following convention (h1, h2, w1, w2) 39 | 40 | Returns 41 | ------- 42 | new_tensor: ndarray 43 | Cropped tensor having size (channels, h2-h1, w2-w1) 44 | """ 45 | h1, h2, w1, w2 = indexes 46 | new_tensor = tensor[:, h1:h2, w1:w2].copy() 47 | 48 | return new_tensor 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Pycharm 2 | .idea/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *,cover 49 | .hypothesis/ 50 | 51 | # Translations 52 | *.mo 53 | *.pot 54 | 55 | # Django stuff: 56 | *.log 57 | local_settings.py 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # IPython Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # dotenv 82 | .env 83 | 84 | # virtualenv 85 | venv/ 86 | ENV/ 87 | 88 | # Spyder project settings 89 | .spyderproject 90 | 91 | # Rope project settings 92 | .ropeproject 93 | img/test1_copy.jpg -------------------------------------------------------------------------------- /filesystem.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import uuid 4 | from os.path import exists 5 | from os.path import join 6 | from os.path import dirname 7 | from os.path import splitext 8 | 9 | 10 | def get_file_list_recursively(top_directory, allowed_extensions=[]): 11 | """ 12 | Get list of full paths of all files found under root directory "top_directory". 13 | If a list of allowed file extensions is provided, files are filtered according to this list. 14 | 15 | Parameters 16 | ---------- 17 | top_directory: str 18 | Root of the hierarchy 19 | allowed_extensions: list 20 | List of extensions to filter result 21 | 22 | Returns 23 | ------- 24 | file_list: list 25 | List of files found under top_directory (with full path) 26 | """ 27 | if not exists(top_directory): 28 | raise ValueError('Directory "{}" does NOT exist.'.format(top_directory)) 29 | 30 | file_list = [] 31 | 32 | for cur_dir, cur_subdirs, cur_files in os.walk(top_directory): 33 | 34 | for file in cur_files: 35 | 36 | f_name, f_ext = splitext(file) 37 | 38 | if f_ext: 39 | if allowed_extensions and f_ext not in allowed_extensions: 40 | pass # skip this file 41 | else: 42 | file_list.append(join(cur_dir, file)) 43 | sys.stdout.write('\r[{}] - found {:06d} files...'.format(top_directory, len(file_list))) 44 | sys.stdout.flush() 45 | else: 46 | pass # todo decide what to do with files without extension 47 | 48 | sys.stdout.write(' Done.\n') 49 | 50 | return file_list 51 | 52 | 53 | def give_unique_id_to_all_files_in_hierarchy(top_directory): 54 | """ 55 | Rename with a unique identifier all the files in a directory hierarchy. 56 | 57 | Parameters 58 | ---------- 59 | top_directory: str 60 | Root of the hierarchy 61 | 62 | Returns 63 | ------- 64 | None 65 | """ 66 | 67 | file_list = get_file_list_recursively(top_directory) 68 | 69 | for file_path in file_list: 70 | 71 | # Split path to maintain absolute path and extension 72 | file_dir = dirname(file_path) 73 | _, file_ext = splitext(file_path) 74 | 75 | # Generate the new path with unique id 76 | file_uuid = str(uuid.uuid4()) 77 | file_new_path = join(file_dir, file_uuid + file_ext) 78 | 79 | os.rename(file_path, file_new_path) 80 | -------------------------------------------------------------------------------- /list_helper.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import pickle 3 | import os.path as path 4 | import sklearn.utils 5 | 6 | 7 | def dump_list(input_list, file_path): 8 | """ 9 | Dump list to file, either in "txt" or binary ("pickle") mode. 10 | Dump mode is chosen accordingly to "file_path" extension. 11 | 12 | Parameters 13 | ---------- 14 | input_list: list 15 | List object to dump 16 | file_path: str 17 | Path of the dump file 18 | 19 | Returns 20 | ------- 21 | None 22 | """ 23 | f_name, f_ext = path.splitext(file_path) 24 | 25 | if f_ext != '.txt' and f_ext != '.pickle': 26 | raise ValueError('File extension not supported. Allowed: {".txt", ".pickle"}. Provided: "{}"'.format(f_ext)) 27 | 28 | with open(file_path, 'wb') as f: 29 | if f_ext == '.txt': 30 | for str in input_list: 31 | f.write('{}\n'.format(str)) 32 | else: 33 | pickle.dump(input_list, f) 34 | 35 | 36 | def load_list(file_path): 37 | """ 38 | Load list from file, either in "txt" or binary ("pickle") mode. 39 | Load mode is chosen accordingly to "file_path" extension. 40 | 41 | Parameters 42 | ---------- 43 | file_path: str 44 | Path of the dump file 45 | 46 | Returns 47 | ------- 48 | file_list: list 49 | List loaded from file. 50 | """ 51 | if not path.exists(file_path): 52 | raise IOError('File "{}" does not exist.'.format(file_path)) 53 | 54 | f_name, f_ext = path.splitext(file_path) 55 | 56 | file_list = [] 57 | 58 | with open(file_path, 'rt') as f: 59 | if f_ext == '.txt': 60 | for line in f: 61 | file_list.append(line.strip()) # remove trailing newline 62 | elif f_ext == '.pickle': 63 | file_list = pickle.load(f) 64 | else: 65 | raise ValueError('File extension not supported. Allowed: {".txt", ".pickle"}. Provided: "{}"'.format(f_ext)) 66 | 67 | return file_list 68 | 69 | 70 | def split_into_chunks(list_in, max_elements, shuffle=False): 71 | """ 72 | Split a list a variable number of chunks of at most "max_elements" each. 73 | 74 | Parameters 75 | ---------- 76 | list_in: list 77 | Input list to split into chunks 78 | max_elements: int 79 | Max elements allowed into each chunk 80 | shuffle: bool 81 | If True, input list is shuffled before chunking 82 | 83 | Returns 84 | ------- 85 | list_out: list 86 | List of list in which each element is a chunk of list_in 87 | """ 88 | 89 | if not isinstance(list_in, list): 90 | raise ValueError('Input must be a list.') 91 | 92 | list_out = [] 93 | 94 | if shuffle: 95 | list_in = sklearn.utils.shuffle(list_in) 96 | 97 | counter = 0 98 | for offset in range(0, len(list_in), max_elements): 99 | list_chunk = list_in[offset:offset + max_elements] 100 | list_out.append(list_chunk) 101 | counter += 1 102 | 103 | return list_out 104 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # computer_vision_utils 2 | 3 | Everything that I code more than twice during my PhD will end up here. 4 | 5 | bounding boxes 6 | --- 7 | 8 | - [bbox_helper.py](bbox_helper.py) 9 | 10 | Defined class `Rectangle` which should help in all situations that involve handling of bounding boxes. 11 | 12 | ``` 13 | class Rectangle(x_min, y_min, x_max, y_max) 14 | Methods: 15 | intersect_with(self, rect) 16 | resize_sides(self, ratio, bounds=None) 17 | draw(self, frame, color=255, thickness=1) 18 | get_binary_mask(self, mask_shape) 19 | Properties: 20 | tl_corner(self) 21 | br_corner(self) 22 | coords(self) 23 | area(self) 24 | ``` 25 | 26 | 27 | stitching 28 | --- 29 | - [stitching.py](stitching.py) 30 | 31 | `stitch_together(input_images, layout, resize_dim=None, off_x=None, off_y=None, bg_color=(0, 0, 0)):` 32 | 33 | Stitch together N input images into a bigger frame, using a grid layout. 34 | Input images can be either color or grayscale, but must all have the same size. 35 | Background color is black by default, but it can be changed with `bg_color` parameter. 36 | 37 | containers 38 | --- 39 | 40 | - [list_helper.py](list_helper.py) 41 | 42 | `dump_list(input_list, file_path)` 43 | 44 | Dump list to file, either in "txt" or binary ("pickle") mode. Dump mode is chosen accordingly to "file_path" extension. 45 | 46 | `load_list(file_path)` 47 | 48 | Load list from file, either in "txt" or binary ("pickle") mode. Load mode is chosen accordingly to "file_path" extension. 49 | 50 | `split_into_chunks(list_in, max_elements, shuffle=False)` 51 | 52 | Split a list a variable number of chunks of at most "max_elements" each. 53 | 54 | filesystem 55 | --- 56 | 57 | - [filesystem.py](filesystem.py) 58 | 59 | `get_file_list_recursively(top_directory, allowed_extension=[])` 60 | 61 | Get list of full paths of all files found under root directory "top_directory". If a list of allowed file extensions is provided, files are filtered according to this list. 62 | 63 | io 64 | --- 65 | 66 | - [io_helper.py](io_helper.py) 67 | 68 | `read_image(path, channels_first, color=True, dtype=np.float32, resize_dim=None)` 69 | 70 | Reads an image from "path" and returns respecting the self explanatory parameters 71 | 72 | `write_image(img_path, img, channels_first=False, color_mode='RGB', resize_dim=None)` 73 | 74 | Writes an image into "img_path" file. If color, you must specify whether the color 75 | dimension is the first one or the last one with "channels_first", and the "color_mode" 76 | as well. Optionally one can resize the image. 77 | 78 | `normalize(img)` 79 | 80 | Normalizes an image between 0 and 255 and returns it as uint8. 81 | 82 | 83 | tensor_manipulation 84 | ------------------- 85 | 86 | - [tensor_manipulation.py](tensor_manipulation.py) 87 | 88 | `resize_tensor(tensor, new_size)` 89 | 90 | Resizes a numeri tensor having shape (channels, h, w) into the new size (channels, new_h, new_w). 91 | Each channel is resized indipendently (good for feature maps). 92 | 93 | `crop_tensor(tensor, indexes)` 94 | 95 | Crops a numeric tensor having shape (channels, h, w) according to indexes in the form (h1,h2,w1,w2). 96 | 97 | 98 | 99 | 100 | 101 | -------------------------------------------------------------------------------- /stitching.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import glob 3 | import numpy as np 4 | 5 | 6 | def stitch_together(input_images, layout, resize_dim=None, off_x=None, off_y=None, bg_color=(0, 0, 0)): 7 | """ 8 | Stitch together N input images into a bigger frame, using a grid layout. 9 | Input images can be either color or grayscale, but must all have the same size. 10 | 11 | Parameters 12 | ---------- 13 | input_images : list 14 | List of input images 15 | layout : tuple 16 | Grid layout of the stitch expressed as (rows, cols) 17 | resize_dim : couple 18 | If not None, stitch is resized to this size 19 | off_x : int 20 | Offset between stitched images along x axis 21 | off_y : int 22 | Offset between stitched images along y axis 23 | bg_color : tuple 24 | Color used for background 25 | 26 | Returns 27 | ------- 28 | stitch : ndarray 29 | Stitch of input images 30 | """ 31 | 32 | if len(set([img.shape for img in input_images])) > 1: 33 | raise ValueError('All images must have the same shape') 34 | 35 | if len(set([img.dtype for img in input_images])) > 1: 36 | raise ValueError('All images must have the same data type') 37 | 38 | # determine if input images are color (3 channels) or grayscale (single channel) 39 | if len(input_images[0].shape) == 2: 40 | mode = 'grayscale' 41 | img_h, img_w = input_images[0].shape 42 | elif len(input_images[0].shape) == 3: 43 | mode = 'color' 44 | img_h, img_w, img_c = input_images[0].shape 45 | else: 46 | raise ValueError('Unknown shape for input images') 47 | 48 | # if no offset is provided, set to 10% of image size 49 | if off_x is None: 50 | off_x = img_w // 10 51 | if off_y is None: 52 | off_y = img_h // 10 53 | 54 | # create stitch mask 55 | rows, cols = layout 56 | stitch_h = rows * img_h + (rows + 1) * off_y 57 | stitch_w = cols * img_w + (cols + 1) * off_x 58 | if mode == 'color': 59 | bg_color = np.array(bg_color)[None, None, :] # cast to ndarray add singleton dimensions 60 | stitch = np.uint8(np.repeat(np.repeat(bg_color, stitch_h, axis=0), stitch_w, axis=1)) 61 | elif mode == 'grayscale': 62 | stitch = np.zeros(shape=(stitch_h, stitch_w), dtype=np.uint8) 63 | 64 | for r in range(0, rows): 65 | for c in range(0, cols): 66 | 67 | list_idx = r * cols + c 68 | 69 | if list_idx < len(input_images): 70 | if mode == 'color': 71 | stitch[ r * (off_y + img_h) + off_y: r*(off_y+img_h) + off_y + img_h, 72 | c * (off_x + img_w) + off_x: c * (off_x + img_w) + off_x + img_w, 73 | :] = input_images[list_idx] 74 | elif mode == 'grayscale': 75 | stitch[ r * (off_y + img_h) + off_y: r*(off_y+img_h) + off_y + img_h, 76 | c * (off_x + img_w) + off_x: c * (off_x + img_w) + off_x + img_w]\ 77 | = input_images[list_idx] 78 | 79 | if resize_dim: 80 | stitch = cv2.resize(stitch, dsize=(resize_dim[::-1])) 81 | 82 | return stitch 83 | 84 | 85 | if __name__ == '__main__': 86 | 87 | img_list = glob.glob('img/*.jpg') 88 | 89 | images = [cv2.imread(f, cv2.IMREAD_COLOR) for f in img_list] 90 | 91 | s = stitch_together(images, layout=(5, 5), resize_dim=(1000, 1000), bg_color=(255, 255, 255)) 92 | 93 | cv2.imshow('s', s) 94 | cv2.waitKey() 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /io_helper.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import os.path as path 4 | 5 | 6 | def read_image(img_path, channels_first, color=True, color_mode='BGR', dtype=np.float32, resize_dim=None): 7 | 8 | """ 9 | Reads and returns an image as a numpy array 10 | 11 | Parameters 12 | ---------- 13 | img_path : string 14 | Path of the input image 15 | channels_first: bool 16 | If True, channel dimension is moved in first position 17 | color: bool, optional 18 | If True, image is loaded in color: grayscale otherwise 19 | color_mode: "RGB", "BGR", optional 20 | Whether to load the color image in RGB or BGR format 21 | dtype: dtype, optional 22 | Array is casted to this data type before being returned 23 | resize_dim: tuple, optional 24 | Resize size following convention (new_h, new_w) - interpolation is linear 25 | 26 | Returns 27 | ------- 28 | image : np.array 29 | Loaded Image as numpy array of type dtype 30 | """ 31 | 32 | if not path.exists(img_path): 33 | raise ValueError('Provided path "{}" does NOT exist.'.format(img_path)) 34 | 35 | image = cv2.imread(img_path, cv2.IMREAD_COLOR if color else cv2.IMREAD_GRAYSCALE) 36 | 37 | if color and color_mode == 'RGB': 38 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 39 | 40 | if resize_dim is not None: 41 | image = cv2.resize(image, dsize=resize_dim[::-1], interpolation=cv2.INTER_LINEAR) 42 | 43 | if color and channels_first: 44 | image = np.transpose(image, (2, 0, 1)) 45 | 46 | return image.astype(dtype) 47 | 48 | 49 | def write_image(img_path, img, channels_first=False, color_mode='BGR', resize_dim=None, to_normalize=False): 50 | """ 51 | Writes an image (numpy array) on file 52 | 53 | Parameters 54 | ---------- 55 | img_path : string 56 | Path where to save image 57 | img : ndarray 58 | Image that has to be saved 59 | channels_first: bool 60 | Set this True if shape is (c, h, w) 61 | color_mode: "RGB", "BGR", optional 62 | Whether the image is in RGB or BGR format 63 | resize_dim: tuple, optional 64 | Resize size following convention (new_h, new_w) - interpolation is linear 65 | to_normalize: bool 66 | Whether or not to normalize the image between 0 and 255. 67 | 68 | Returns 69 | ---------- 70 | """ 71 | 72 | color = True if img.ndim == 3 else False 73 | if color and channels_first: 74 | img = img.transpose(1, 2, 0) 75 | 76 | if color and color_mode == 'RGB': 77 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 78 | 79 | if resize_dim is not None: 80 | img = cv2.resize(img, resize_dim[::-1]) 81 | 82 | if to_normalize: 83 | normalize(img) 84 | 85 | cv2.imwrite(img_path, img) 86 | 87 | 88 | def normalize(img): 89 | """ 90 | Normalizes an image between 0 and 255 and returns it as uint8. 91 | 92 | Parameters 93 | ---------- 94 | img : ndarray 95 | Image that has to be normalized 96 | 97 | Returns 98 | ---------- 99 | img : ndarray 100 | The normalized image 101 | """ 102 | img = img.astype(np.float32) 103 | img -= img.min() 104 | img /= img.max() 105 | img *= 255 106 | img = img.astype(np.uint8) 107 | 108 | return img 109 | 110 | if __name__ == '__main__': 111 | 112 | img = read_image('img/test1.jpg', False, color=False, color_mode='BGR', dtype=np.uint8) 113 | cv2.imshow('test image', img) 114 | cv2.waitKey() 115 | 116 | write_image('img/test1_copy.jpg', img, channels_first=False, color_mode='BGR') 117 | -------------------------------------------------------------------------------- /bbox_helper.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | 5 | class Rectangle: 6 | """ 7 | 2D Rectangle defined by top-left and bottom-right corners. 8 | 9 | Parameters 10 | ---------- 11 | x_min : int 12 | x coordinate of top-left corner. 13 | y_min : int 14 | y coordinate of top-left corner. 15 | x_max : int 16 | x coordinate of bottom-right corner. 17 | y_min : int 18 | y coordinate of bottom-right corner. 19 | """ 20 | 21 | def __init__(self, x_min, y_min, x_max, y_max, label=""): 22 | 23 | self.x_min = x_min 24 | self.y_min = y_min 25 | self.x_max = x_max 26 | self.y_max = y_max 27 | 28 | self.x_side = self.x_max - self.x_min 29 | self.y_side = self.y_max - self.y_min 30 | 31 | self.label=label 32 | 33 | def intersect_with(self, rect): 34 | """ 35 | Compute the intersection between this instance and another Rectangle. 36 | 37 | Parameters 38 | ---------- 39 | rect : Rectangle 40 | The instance of the second Rectangle. 41 | 42 | Returns 43 | ------- 44 | intersection_area : float 45 | Area of intersection between the two rectangles expressed in number of pixels. 46 | """ 47 | if not isinstance(rect, Rectangle): 48 | raise ValueError('Cannot compute intersection if "rect" is not a Rectangle') 49 | 50 | dx = min(self.x_max, rect.x_max) - max(self.x_min, rect.x_min) 51 | dy = min(self.y_max, rect.y_max) - max(self.y_min, rect.y_min) 52 | 53 | if dx >= 0 and dy >= 0: 54 | intersection = dx * dy 55 | else: 56 | intersection = 0. 57 | 58 | return intersection 59 | 60 | def resize_sides(self, ratio, bounds=None): 61 | """ 62 | Resize the sides of rectangle while mantaining the aspect ratio and center position. 63 | 64 | Parameters 65 | ---------- 66 | ratio : float 67 | Ratio of the resize in range (0, infinity), where 2 means double the size and 0.5 is half of the size. 68 | bounds: tuple, optional 69 | If present, clip the Rectangle to these bounds=(xbmin, ybmin, xbmax, ybmax). 70 | 71 | Returns 72 | ------- 73 | rectangle : Rectangle 74 | Reshaped Rectangle. 75 | """ 76 | 77 | # compute offset 78 | off_x = abs(ratio * self.x_side - self.x_side) / 2 79 | off_y = abs(ratio * self.y_side - self.y_side) / 2 80 | 81 | # offset changes sign according if the resize is either positive or negative 82 | sign = np.sign(ratio - 1.) 83 | off_x = np.int32(off_x * sign) 84 | off_y = np.int32(off_y * sign) 85 | 86 | # update top-left and bottom-right coords 87 | new_x_min, new_y_min = self.x_min - off_x, self.y_min - off_y 88 | new_x_max, new_y_max = self.x_max + off_x, self.y_max + off_y 89 | 90 | # eventually clip the coordinates according to the given bounds 91 | if bounds: 92 | b_x_min, b_y_min, b_x_max, b_y_max = bounds 93 | new_x_min = max(new_x_min, b_x_min) 94 | new_y_min = max(new_y_min, b_y_min) 95 | new_x_max = min(new_x_max, b_x_max) 96 | new_y_max = min(new_y_max, b_y_max) 97 | 98 | return Rectangle(new_x_min, new_y_min, new_x_max, new_y_max) 99 | 100 | def draw(self, frame, color=255, thickness=2, draw_label=False): 101 | """ 102 | Draw Rectangle on a given frame. 103 | 104 | Notice: while this function does not return anything, original image `frame` is modified. 105 | 106 | Parameters 107 | ---------- 108 | frame : 2D / 3D np.array 109 | The image on which the rectangle is drawn. 110 | color : tuple, optional 111 | Color used to draw the rectangle (default = 255) 112 | thickness : int, optional 113 | Line thickness used t draw the rectangle (default = 1) 114 | draw_label : bool, optional 115 | If True and the Rectangle has a label, draws it on the top of the rectangle. 116 | 117 | Returns 118 | ------- 119 | None 120 | """ 121 | if draw_label and self.label: 122 | 123 | # compute text size 124 | text_font, text_scale, text_thick = cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1 125 | (text_w, text_h), baseline = cv2.getTextSize(self.label, text_font, text_scale, text_thick) 126 | 127 | # draw rectangle on which text will be displayed 128 | text_rect_w = min(text_w, self.x_side - 2 * baseline) 129 | out = cv2.rectangle(frame.copy(), pt1=(self.x_min, self.y_min - text_h - 2 * baseline), 130 | pt2=(self.x_min + text_rect_w + 2 * baseline, self.y_min), color=color, thickness=cv2.FILLED) 131 | cv2.addWeighted(frame, 0.75, out, 0.25, 0, dst=frame) 132 | 133 | # actually write text label 134 | cv2.putText(frame, self.label, (self.x_min + baseline, self.y_min - baseline), 135 | text_font, text_scale, (0, 0, 0), text_thick, cv2.LINE_AA) 136 | 137 | # add text rectangle border 138 | cv2.rectangle(frame, pt1=(self.x_min, self.y_min - text_h - 2 * baseline), 139 | pt2=(self.x_min + text_rect_w + 2 * baseline, self.y_min), color=color, thickness=thickness) 140 | 141 | # draw the Rectangle 142 | cv2.rectangle(frame, (self.x_min, self.y_min), (self.x_max, self.y_max), color, thickness) 143 | 144 | def get_binary_mask(self, mask_shape): 145 | """ 146 | Get uint8 binary mask of shape `mask_shape` with rectangle in foreground. 147 | 148 | Parameters 149 | ---------- 150 | mask_shape : (tuple) 151 | Shape of the mask to return - following convention (h, w) 152 | 153 | Returns 154 | ------- 155 | mask : np.array 156 | Binary uint8 mask of shape `mask_shape` with rectangle drawn as foreground. 157 | """ 158 | if mask_shape[0] < self.y_max or mask_shape[1] < self.x_max: 159 | raise ValueError('Mask shape is smaller than Rectangle size') 160 | mask = np.zeros(shape=mask_shape, dtype=np.uint8) 161 | mask = cv2.rectangle(mask, self.tl_corner, self.br_corner, color=255, thickness=cv2.FILLED) 162 | return mask 163 | 164 | @property 165 | def tl_corner(self): 166 | """ 167 | Coordinates of the top-left corner of rectangle (as int32). 168 | 169 | Returns 170 | ------- 171 | tl_corner : int32 tuple 172 | """ 173 | return tuple(map(np.int32, (self.x_min, self.y_min))) 174 | 175 | @property 176 | def br_corner(self): 177 | """ 178 | Coordinates of the bottom-right corner of rectangle. 179 | 180 | Returns 181 | ------- 182 | br_corner : int32 tuple 183 | """ 184 | return tuple(map(np.int32, (self.x_max, self.y_max))) 185 | 186 | @property 187 | def coords(self): 188 | """ 189 | Coordinates (x_min, y_min, x_max, y_max) which define the Rectangle. 190 | 191 | Returns 192 | ------- 193 | coordinates : int32 tuple 194 | """ 195 | return tuple(map(np.int32, (self.x_min, self.y_min, self.x_max, self.y_max))) 196 | 197 | @property 198 | def area(self): 199 | """ 200 | Get the area of Rectangle 201 | 202 | Returns 203 | ------- 204 | area : float32 205 | """ 206 | return np.float32(self.x_side * self.y_side) 207 | 208 | 209 | if __name__ == '__main__': 210 | 211 | img = cv2.imread('img/test1.jpg') 212 | r = Rectangle(100, 100, 300, 300) 213 | r.label = "ciao" * 3 214 | r.draw(img, draw_label=True) 215 | 216 | cv2.imshow('', img) 217 | cv2.waitKey() 218 | --------------------------------------------------------------------------------