├── LICENSE ├── README.md └── datagen.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Matthew 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ImageData-Generator 2 | Converts folders of images to chunks which can easily be saved/loaded into RAM (numpy). 3 | 4 | Easily import your files and convert them to NumPy arrays. 5 | Automatically saves the arrays in segments which can easily be loaded into RAM. 6 | 7 | 8 | 9 | 10 | To use: 11 | 12 | ```python 13 | dataGenerator(folder, im_size, mss = (1024 ** 3), flip = True, verbose = True) 14 | ``` 15 | 16 | folder: The directory, must be inside another folder named data. 17 | 18 | im_size: The size each image should be resized to (ex. 128 = 128x128). 19 | 20 | mss: Maximum Segment Size (in bytes), default 1GB. 21 | 22 | flip: Toggle whether or not imported images should be duplicated and flipped. 23 | 24 | verbose: Toggle whether the data generator prints information or not. 25 | 26 | 27 | 28 | 29 | ```python 30 | d = dataGenerator(folder, im_size) 31 | d.get_batch(num) 32 | ``` 33 | 34 | num: Number of images to return 35 | 36 | get_batch selects random images from the currently loaded segment, and counts the number of images sampled so that it can load a new segment when enough images have been sampled. 37 | 38 | 39 | Feel free to steal this code for your own projects, and feel free to optimize it however you see fit! 40 | -------------------------------------------------------------------------------- /datagen.py: -------------------------------------------------------------------------------- 1 | from PIL import Image 2 | import numpy as np 3 | import random 4 | import os 5 | 6 | # Print iterations progress 7 | def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 50, fill = '█'): 8 | """ 9 | Call in a loop to create terminal progress bar 10 | @params: 11 | iteration - Required : current iteration (Int) 12 | total - Required : total iterations (Int) 13 | prefix - Optional : prefix string (Str) 14 | suffix - Optional : suffix string (Str) 15 | decimals - Optional : positive number of decimals in percent complete (Int) 16 | length - Optional : character length of bar (Int) 17 | fill - Optional : bar fill character (Str) 18 | """ 19 | percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total))) 20 | filledLength = int(length * iteration // total) 21 | bar = fill * filledLength + '-' * (length - filledLength) 22 | print('\r %s |%s| %s%% %s' % (prefix, bar, percent, suffix), end = '\r') 23 | # Print New Line on Complete 24 | if iteration == total: 25 | print() 26 | print() 27 | 28 | 29 | 30 | 31 | 32 | 33 | class dataGenerator(object): 34 | 35 | def __init__(self, folder, im_size, mss = (1024 ** 3), flip = True, verbose = True): 36 | self.folder = folder 37 | self.im_size = im_size 38 | self.segment_length = mss // (im_size * im_size * 3) 39 | self.flip = flip 40 | self.verbose = verbose 41 | 42 | self.segments = [] 43 | self.images = [] 44 | self.update = 0 45 | 46 | if self.verbose: 47 | print("Importing images...") 48 | print("Maximum Segment Size: ", self.segment_length) 49 | 50 | try: 51 | os.mkdir("data/" + self.folder + "-npy-" + str(self.im_size)) 52 | except: 53 | self.load_from_npy(folder) 54 | return 55 | 56 | self.folder_to_npy(self.folder) 57 | self.load_from_npy(self.folder) 58 | 59 | def folder_to_npy(self, folder): 60 | 61 | if self.verbose: 62 | print("Converting from images to numpy files...") 63 | 64 | names = [] 65 | 66 | for dirpath, dirnames, filenames in os.walk("data/" + folder): 67 | for filename in [f for f in filenames if (f.endswith(".jpg") or f.endswith(".png") or f.endswith(".JPEG"))]: 68 | fname = os.path.join(dirpath, filename) 69 | names.append(fname) 70 | 71 | np.random.shuffle(names) 72 | 73 | if self.verbose: 74 | print(str(len(names)) + " images.") 75 | 76 | 77 | 78 | kn = 0 79 | sn = 0 80 | 81 | segment = [] 82 | 83 | for fname in names: 84 | if self.verbose: 85 | print('\r' + str(sn) + " // " + str(kn) + "\t", end = '\r') 86 | 87 | temp = Image.open(fname).convert('RGB').resize((self.im_size, self.im_size), Image.BILINEAR) 88 | temp = np.array(temp, dtype='uint8') 89 | segment.append(temp) 90 | kn = kn + 1 91 | 92 | if kn >= self.segment_length: 93 | np.save("data/" + folder + "-npy-" + str(self.im_size) + "/data-"+str(sn)+".npy", np.array(segment)) 94 | 95 | segment = [] 96 | kn = 0 97 | sn = sn + 1 98 | 99 | 100 | np.save("data/" + folder + "-npy-" + str(self.im_size) + "/data-"+str(sn)+".npy", np.array(segment)) 101 | 102 | 103 | def load_from_npy(self, folder): 104 | 105 | for dirpath, dirnames, filenames in os.walk("data/" + folder + "-npy-" + str(self.im_size)): 106 | for filename in [f for f in filenames if f.endswith(".npy")]: 107 | self.segments.append(os.path.join(dirpath, filename)) 108 | 109 | self.load_segment() 110 | 111 | def load_segment(self): 112 | 113 | if self.verbose: 114 | print("Loading segment") 115 | 116 | segment_num = random.randint(0, len(self.segments) - 1) 117 | 118 | self.images = np.load(self.segments[segment_num]) 119 | 120 | self.update = 0 121 | 122 | def get_batch(self, num): 123 | 124 | if self.update > self.images.shape[0]: 125 | self.load_from_npy(self.folder) 126 | 127 | self.update = self.update + num 128 | 129 | idx = np.random.randint(0, self.images.shape[0] - 1, num) 130 | out = [] 131 | 132 | for i in idx: 133 | out.append(self.images[i]) 134 | if self.flip and random.random() < 0.5: 135 | out[-1] = np.flip(out[-1], 1) 136 | 137 | return np.array(out).astype('float32') / 255.0 138 | 139 | --------------------------------------------------------------------------------