├── .gitignore ├── img_funcs.py └── imagenet_analysis.py /.gitignore: -------------------------------------------------------------------------------- 1 | tars 2 | unpacked 3 | annotation 4 | output 5 | -------------------------------------------------------------------------------- /img_funcs.py: -------------------------------------------------------------------------------- 1 | import Image 2 | import ImageDraw 3 | import numpy as np 4 | 5 | from IPython.core.debugger import Tracer 6 | tracer = Tracer() 7 | 8 | def draw_bounding_boxes(imgfile, bounding_boxes, outfile): 9 | """ Takes an image path, draws the bounding boxes in "box" (list of 4-tuples) and 10 | writes output to filename.""" 11 | im = Image.open(imgfile) 12 | for xmin, ymin, xmax, ymax in bounding_boxes: 13 | draw = ImageDraw.Draw(im) 14 | draw.rectangle([xmin, ymin, xmax, ymax], outline='red') 15 | im.save(outfile) 16 | 17 | def grab_bounding_boxes(imgfile, bounding_boxes): 18 | im = Image.open(imgfile) 19 | bbs = [] 20 | for box in bounding_boxes: 21 | bbs.append(im.crop(box)) 22 | return bbs 23 | 24 | 25 | 26 | def collection_mean(images): 27 | """ input: list of pil images, output mean of images 28 | converts all images to 512x512 by scaling and padding 29 | """ 30 | 31 | resized_images = [] 32 | for image in images: 33 | # resize so larger side is 512 34 | scale_factor = 512. / max(image.size) 35 | 36 | # we do cases to avoid float problems 37 | if image.size[0] > image.size[1]: 38 | image = image.resize([512, int(image.size[1] * scale_factor)]) 39 | else: 40 | image = image.resize([int(image.size[1] * scale_factor), 512]) 41 | 42 | image = np.array(image)/255. 43 | 44 | # make square by padding with zeros 45 | padding = np.abs(image.shape[0] - image.shape[1]) 46 | up = np.ceil(padding/2.) 47 | down = np.floor(padding/2.) 48 | if image.shape[0] > image.shape[1]: 49 | padded = np.hstack([np.zeros([image.shape[0], up, 3]), image,np.zeros([image.shape[0], down, 3])]) 50 | else: 51 | padded = np.vstack([np.zeros([up, image.shape[1], 3]), image,np.zeros([down, image.shape[1], 3])]) 52 | 53 | resized_images.append(padded) 54 | 55 | mean = np.mean(resized_images, axis=0) 56 | import matplotlib.pyplot as plt 57 | plt.imshow(mean) 58 | plt.show() 59 | tracer() 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /imagenet_analysis.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | sys.path.insert(0, os.path.join(os.getenv("HOME"),"python_packages/lib/python2.6/site-packages/")) 5 | 6 | from scipy.io import loadmat 7 | import numpy as np 8 | import os 9 | from glob import glob 10 | 11 | from img_funcs import draw_bounding_boxes, grab_bounding_boxes 12 | import xml.etree.ElementTree as ET 13 | 14 | from joblib import Memory 15 | 16 | memory = Memory("cache") 17 | #import elementtree.ElementTree as ET 18 | 19 | def pad_to(X, n): 20 | """Pad a 1-d array of size <= n with zeros to size n.""" 21 | if X.ndim != 1: 22 | raise ValueError("Only 1-d arrays can be padded.") 23 | size = X.size 24 | if size == n: 25 | return X 26 | elif size < n: 27 | return np.hstack([X, np.zeros((n-size))]) 28 | else: 29 | raise ValueError("Size of X must be smaller or equal to n.") 30 | 31 | @memory.cache 32 | def cached_bow(files): 33 | features = [] 34 | file_names = [] 35 | wnids = [] 36 | counts = [] 37 | 38 | for bow_file in files: 39 | print("loading %s"%bow_file) 40 | bow_structs = loadmat(bow_file, struct_as_record=False)['image_sbow'] 41 | file_names.extend([str(x[0]._fieldnames) for x in bow_structs]) 42 | bags_of_words = [pad_to(np.bincount(struct[0].sbow[0][0].word.ravel()), 1000) for struct in bow_structs] 43 | features.extend(bags_of_words) 44 | # if we where interested in the actual words: 45 | #words = [struct[0][1][0][0][0] for struct in bow_structs] 46 | # there is other stuff in the struct but I don't care at the moment: 47 | #x = [struct[0][1][0][0][1] for struct in bow_structs] 48 | #y = [struct[0][1][0][0][2] for struct in bow_structs] 49 | #scale = [struct[0][1][0][0][3] for struct in bow_structs] 50 | #norm = [struct[0][1][0][0][4] for struct in bow_structs] 51 | wnid = os.path.basename(bow_file).split(".")[0] 52 | wnids.append(wnid) 53 | counts.append(len(bags_of_words)) 54 | features = np.array(features) 55 | return features, wnids, counts 56 | 57 | 58 | class ImageNetData(object): 59 | """ ImageNetData needs path to meta.mat, path to images and path to annotations. 60 | The images are assumed to be in folders according to their synsets names 61 | 62 | Synsets are always handled using their index in the 'synsets' dict. This 63 | is their id-1 and is referred to as classidx. 64 | Images are handles using their id, which is the number in the file name. 65 | These are non-concecutive and therefore called id/imgid. 66 | """ 67 | def __init__(self, meta_path, image_path=None, annotation_path=None, bow_path=None): 68 | self.image_path = image_path 69 | self.annotation_path = annotation_path 70 | self.meta_path = meta_path 71 | self.meta_data = loadmat(os.path.join(meta_path, "meta.mat"), struct_as_record=False) 72 | self.bow_path = bow_path 73 | 74 | self.synsets = np.squeeze(self.meta_data['synsets']) 75 | 76 | #['ILSVRC2010_ID', 'WNID', 'words', 'gloss', 'num_children', 'children', 'wordnet_height', 'num_train_images'] 77 | self.ids = np.squeeze(np.array([x.ILSVRC2010_ID for x in self.synsets])) 78 | self.wnids = np.squeeze(np.array([x.WNID for x in self.synsets])) 79 | self.word = np.squeeze(np.array([x.words for x in self.synsets])) 80 | self.num_children = np.squeeze(np.array([x.num_children for x in self.synsets])) 81 | self.children = [np.squeeze(x.children).astype(np.int) for x in self.synsets] 82 | 83 | def img_path_from_id(self, classidx, imgidx): 84 | wnid = self.wnids[classidx] 85 | return os.path.join(self.image_path, wnid, wnid+'_'+imgidx+".JPEG") 86 | 87 | def class_idx_from_string(self, search_string): 88 | """Get class index from string in class name.""" 89 | indices = np.where([search_string in x[2][0] for x in self.synsets])[0] 90 | for i in indices: 91 | print(self.synsets[i]) 92 | return indices 93 | 94 | def get_children(self, aclass): 95 | """Traverse tree to the leafes. Takes classidx, returns 96 | list of all recursive chilren of this class.""" 97 | 98 | # minus one converts ids into indices in our arrays 99 | children = self.synsets[aclass][5][0] - 1 100 | 101 | print(self.synsets[aclass]) 102 | rchildren = children.tolist() 103 | 104 | print "-----------------" 105 | for child in children: 106 | print self.synsets[child] 107 | # recurse 108 | if self.synsets[child][4] != 0: 109 | rchildren.extend(self.get_children(child)) 110 | return rchildren 111 | 112 | def get_bndbox(self, classidx, imageid): 113 | """Get bouning box coordinates for image with id ``imageid`` 114 | in synset given by ``classidx``.""" 115 | 116 | wnid = self.wnids[classidx] 117 | annotation_file = os.path.join(self.annotation_path, str(wnid), str(wnid) + "_" + str(imageid) + ".xml") 118 | xmltree = ET.parse(annotation_file) 119 | objects = xmltree.findall("object") 120 | result = [] 121 | for object_iter in objects: 122 | bndbox = object_iter.find("bndbox") 123 | result.append([int(it.text) for it in bndbox]) 124 | #[xmin, ymin, xmax, ymax] = [it.text for it in bndbox] 125 | return result 126 | 127 | def get_image_ids(self, theclass): 128 | wnid = self.wnids[theclass] 129 | files = glob(os.path.join(self.image_path,wnid,wnid+"*")) 130 | filenames = [os.path.basename(f)[:-5] for f in files] 131 | numbers = map(lambda f: f.split("_")[1], filenames) 132 | return numbers 133 | 134 | def bounding_box_images(self, classidx): 135 | """Get list of cut out bounding boxes 136 | for a given classidx.""" 137 | 138 | if not os.path.exists("output/bounding_box"): 139 | os.mkdir("output/bounding_box") 140 | #class_string = self.word[classidx] 141 | wnid = self.wnids[classidx] 142 | if not os.path.exists(os.path.join("output/bounding_box", wnid)): 143 | os.mkdir(os.path.join("output/bounding_box", wnid)) 144 | 145 | image_ids = self.get_image_ids(classidx) 146 | bbfiles = [] 147 | for imgid in image_ids: 148 | try: 149 | bounding_boxes = self.get_bndbox(classidx, imgid) 150 | except IOError: 151 | #no bounding box 152 | #print("no xml found") 153 | continue 154 | bbfiles.append(imgid) 155 | img_path = self.img_path_from_id(classidx, imgid) 156 | out_path = str(os.path.join("output/bounding_box", wnid, wnid+'_'+imgid+".png")) 157 | draw_bounding_boxes(img_path, bounding_boxes, out_path) 158 | #if len(bbfiles)>2: 159 | #break 160 | print("annotated files: %d"%len(bbfiles)) 161 | 162 | def class_idx_from_wnid(self, wnid): 163 | """Get class index in ``self.synset`` from synset id""" 164 | result = np.where(self.wnids==wnid) 165 | if len(result[0]) == 0: 166 | raise ValueError("Invalid wnid.") 167 | return result[0][0] 168 | 169 | def all_bounding_boxes(self, classidx): 170 | image_ids = self.get_image_ids(classidx) 171 | all_bbs = [] 172 | for imgid in image_ids: 173 | try: 174 | img_bbs = self.get_bndbox(classidx, imgid) 175 | except IOError: 176 | #no bounding box 177 | #print("no xml found") 178 | continue 179 | f = self.img_path_from_id(classidx, imgid) 180 | all_bbs.extend(grab_bounding_boxes(f, img_bbs)) 181 | return all_bbs; 182 | 183 | def load_val_labels(self): 184 | return np.loadtxt(os.path.join(self.meta_path, "ILSVRC2010_validation_ground_truth.txt")) 185 | 186 | def load_bow(self, dataset="train"): 187 | """Get bow representation of dataset ``dataset``. 188 | Legal values are ``train``, ``val`` and ``test``. 189 | 190 | Returns 191 | ------- 192 | features : numpy array, shape [n_samples, n_features], 193 | containing bow representation of all images in given dataset 194 | 195 | labels : numpy array, shape [n_samples], 196 | containing classidx for image labels. (Not available for ``test``) 197 | """ 198 | if not self.bow_path: 199 | raise ValueError("You have to specify the path to" 200 | "the bow features in ``bow_path`` to be able" 201 | "to load them") 202 | 203 | files = glob(os.path.join(self.bow_path, dataset, "*.sbow.mat")) 204 | 205 | if len(files) == 0: 206 | raise ValueError("Could not find any bow files.") 207 | 208 | features, wnids, counts = cached_bow(files) 209 | 210 | if dataset == "train": 211 | labels_nested = [[self.class_idx_from_wnid(wnid)] * count for wnid, count in zip(wnids, counts)] 212 | labels = np.array([x for l in labels_nested for x in l]) 213 | elif dataset == "val": 214 | labels = self.load_val_labels() 215 | elif dataset == "test": 216 | labels = None 217 | else: 218 | raise ValueError("Unknow dataset %s"%dataset) 219 | 220 | return features, labels 221 | 222 | 223 | def main(): 224 | # ImageNetData needs path to meta.mat, path to images and path to annotations. 225 | # The images are assumed to be in folders according to their synsets names 226 | #imnet = ImageNetData("ILSVRC2011_devkit-2.0/data", "unpacked", "annotation") 227 | imnet = ImageNetData("/nfs3group/chlgrp/datasets/ILSVRC2010/devkit-1.0/data", 228 | bow_path="/nfs3group/chlgrp/datasets/ILSVRC2010") 229 | 230 | features, labels = imnet.load_bow() 231 | features_val, labels_val = imnet.load_bow('val') 232 | 233 | from IPython.core.debugger import Tracer 234 | tracer = Tracer(colors="LightBG") 235 | tracer() 236 | 237 | 238 | 239 | if __name__ == "__main__": 240 | main() 241 | --------------------------------------------------------------------------------