├── README.md └── prepare.py /README.md: -------------------------------------------------------------------------------- 1 | Scripts to prepare OXFORD VGG Face dataset 2 | ==================== 3 | 4 | Things I used to download and prepare dataset for experiments: 5 | * [aria2](https://aria2.github.io/) download utility. You can download all prepared links from [here](https://yadi.sk/d/c8gR5nIEqZteV). To download dataset simply type `aria2c -i vgg-faces-aria.txt --deferred-input -j 60`. It took approximatly 1.5 days and 100GB of space. 6 | * Python 2.7.11 with `pillow` module for image processing. You can crop all downloaded images with `python2 prepare.py` command. 7 | 8 | Check dataset's [page](http://www.robots.ox.ac.uk/~vgg/data/vgg_face/) for more information. 9 | -------------------------------------------------------------------------------- /prepare.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python2 2 | # -*- coding: utf-8 -*- 3 | 4 | from __future__ import print_function, division 5 | 6 | import os 7 | import shutil 8 | import imghdr 9 | from PIL import Image 10 | 11 | 12 | def clean_subdir(save_path, cur_path): 13 | _, subdirs, files = next(os.walk(cur_path)) 14 | if len(subdirs) > 0: 15 | subdir_path = os.path.join(cur_path, subdirs[0]) 16 | clean_subdir(save_path, subdir_path) 17 | elif len(files) > 0: 18 | clean_file(save_path, os.path.join(cur_path, files[0])) 19 | shutil.rmtree(cur_path) 20 | return None 21 | 22 | 23 | def clean_file(save_path, cur_path): 24 | print(cur_path, save_path) 25 | ext = imghdr.what(cur_path) 26 | if ext is None: 27 | os.remove(cur_path) 28 | return None 29 | save_name = save_path + '.' + ext 30 | shutil.move(cur_path, save_name) 31 | return None 32 | 33 | 34 | def get_clean_name(name): 35 | return name[:name.index('.')] 36 | 37 | 38 | def walk_and_clean(root_dir='./'): 39 | names = next(os.walk(root_dir))[1] 40 | print(names) 41 | for name in names: 42 | cur_path = os.path.join(root_dir, name) 43 | _, subdirs, files = next(os.walk(cur_path)) 44 | clean_subdirs = map(get_clean_name, subdirs) 45 | map(lambda s: os.rename(os.path.join(cur_path, s[1]), os.path.join(cur_path, s[0])), zip(clean_subdirs, subdirs)) 46 | map(lambda s: clean_subdir(os.path.join(cur_path, s), os.path.join(cur_path, s)), clean_subdirs) 47 | clean_files = map(get_clean_name, files) 48 | map(lambda f: clean_file(os.path.join(cur_path, f[0]), os.path.join(cur_path, f[1])), zip(clean_files, files)) 49 | 50 | 51 | def map_names(filenames): 52 | idx_map = dict() 53 | for name in filenames: 54 | idx_map[int(get_clean_name(name))] = name 55 | return idx_map 56 | 57 | 58 | def crop_from_file(filename, images_dir='./', save_dir='./cropped'): 59 | person = filename[filename.rfind('/')+1:filename.rfind('.')] 60 | print(person) 61 | if not os.path.exists(os.path.join(save_dir, person)): 62 | os.makedirs(os.path.join(save_dir, person)) 63 | img_map = map_names(next(os.walk(os.path.join(images_dir, person)))[2]) 64 | with open(filename, 'r') as f: 65 | for line in f: 66 | arguments = map(str.strip, line.split(' ')) 67 | if len(arguments) != 10: 68 | continue 69 | img_idx = int(arguments[0]) 70 | bbox = tuple(map(int, map(float, arguments[3:7]))) 71 | if img_idx not in img_map: 72 | continue 73 | img_name = img_map[img_idx] 74 | Image.open(os.path.join(images_dir, person, img_name)).crop(bbox).save(os.path.join(save_dir, person, img_name)) 75 | return None 76 | 77 | 78 | def walk_and_crop(instr_dir='./', images_dir='./images', save_dir='./cropped'): 79 | map(lambda f: crop_from_file(os.path.join(instr_dir, f), images_dir, save_dir), next(os.walk(instr_dir))[2]) 80 | return None 81 | 82 | 83 | if __name__ == '__main__': 84 | walk_and_clean('./out') 85 | os.makedirs('./cropped') 86 | walk_and_crop('./files', './out', './cropped') 87 | --------------------------------------------------------------------------------