├── common
    ├── __init__.py
    ├── find_mxnet.py
    ├── util.py
    ├── modelzoo.py
    ├── data.py
    └── fit.py
├── model
    └── PUT_YOUR_MODEL_HERE
├── data
    ├── gen_rec.sh
    └── mx_list.py
├── run.sh
├── .gitignore
├── fine-tune.py
├── README.md
└── sub.py


/common/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/model/PUT_YOUR_MODEL_HERE:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data/gen_rec.sh:
--------------------------------------------------------------------------------
1 | #get im2rec.py at https://github.com/dmlc/mxnet/tree/master/tools
2 | python -u im2rec.py --resize 512 --quality 95 --num-thread 20 val ./
3 | python -u im2rec.py --resize 512 --quality 95 --num-thread 20 train ./
4 | 


--------------------------------------------------------------------------------
/common/find_mxnet.py:
--------------------------------------------------------------------------------
1 | import os, sys
2 | os.environ["MXNET_CUDNN_AUTOTUNE_DEFAULT"] = "1"
3 | try:
4 |     import mxnet as mx
5 | except ImportError:
6 |     curr_path = os.path.abspath(os.path.dirname(__file__))
7 |     sys.path.append(os.path.join(curr_path, "../../../python"))
8 |     import mxnet as mx
9 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
1 | export MXNET_CPU_WORKER_NTHREADS=48
2 | export MXNET_CUDNN_AUTOTUNE_DEFAULT=0
3 | python fine-tune.py --pretrained-model model/resnet-152 \
4 |     --load-epoch 0 --gpus 0,1,2,3 \
5 | 	--model-prefix model/iNat-resnet-152 \
6 | 	--data-nthreads 48 \
7 |     --batch-size 48 --num-classes 5089 --num-examples 579184
8 | 


--------------------------------------------------------------------------------
/common/util.py:
--------------------------------------------------------------------------------
 1 | import subprocess
 2 | import os
 3 | import errno
 4 | 
 5 | def download_file(url, local_fname=None, force_write=False):
 6 |     # requests is not default installed
 7 |     import requests
 8 |     if local_fname is None:
 9 |         local_fname = url.split('/')[-1]
10 |     if not force_write and os.path.exists(local_fname):
11 |         return local_fname
12 | 
13 |     dir_name = os.path.dirname(local_fname)
14 | 
15 |     if dir_name != "":
16 |         if not os.path.exists(dir_name):
17 |             try: # try to create the directory if it doesn't exists
18 |                 os.makedirs(dir_name)
19 |             except OSError as exc:
20 |                 if exc.errno != errno.EEXIST:
21 |                     raise
22 | 
23 | 
24 | 
25 |     r = requests.get(url, stream=True)
26 |     assert r.status_code == 200, "failed to open %s" % url
27 |     with open(local_fname, 'wb') as f:
28 |         for chunk in r.iter_content(chunk_size=1024):
29 |             if chunk: # filter out keep-alive new chunks
30 |                 f.write(chunk)
31 |     return local_fname
32 | 
33 | def get_gpus():
34 |     """
35 |     return a list of GPUs
36 |     """
37 |     try:
38 |         re = subprocess.check_output(["nvidia-smi", "-L"], universal_newlines=True)
39 |     except OSError:
40 |         return []
41 |     return range(len([i for i in re.split('\n') if 'GPU' in i]))
42 | 


--------------------------------------------------------------------------------
/data/mx_list.py:
--------------------------------------------------------------------------------
 1 | # iNatularist image loader
 2 | 
 3 | 
 4 | from PIL import Image
 5 | import os
 6 | import json
 7 | import numpy as np
 8 | 
 9 | def default_loader(path):
10 |     return Image.open(path).convert('RGB')
11 | 
12 | def gen_list(prefix):
13 | 	ann_file = '%s2017.json'%prefix
14 | 	train_out = '%s.lst'%prefix
15 | 	# load annotations
16 | 	print('Loading annotations from: ' + os.path.basename(ann_file))
17 | 	with open(ann_file) as data_file:
18 | 		ann_data = json.load(data_file)
19 | 
20 | 	# set up the filenames and annotations
21 | 	imgs = [aa['file_name'] for aa in ann_data['images']]
22 | 	im_ids = [aa['id'] for aa in ann_data['images']]
23 | 	if 'annotations' in ann_data.keys():
24 | 		# if we have class labels
25 | 		classes = [aa['category_id'] for aa in ann_data['annotations']]
26 | 	else:
27 | 		# otherwise dont have class info so set to 0
28 | 		classes = [0]*len(im_ids)
29 | 
30 | 	idx_to_class = {cc['id']: cc['name'] for cc in ann_data['categories']}
31 | 
32 | 	print('\t' + str(len(imgs)) + ' images')
33 | 	print('\t' + str(len(idx_to_class)) + ' classes')
34 | 
35 | 	for index in range(10):
36 | 		path = imgs[index]
37 | 		target = str(classes[index])
38 | 		im_id = str(im_ids[index]-1)
39 | 		print(im_id + '\t' + target + '\t' + path)
40 | 
41 | 	import pandas as pd
42 | 	from sklearn.utils import shuffle
43 | 
44 | 	df = pd.DataFrame(classes)
45 | 	df[1] = imgs
46 | 	df = shuffle(df)
47 | 
48 | 	df.to_csv(train_out, sep='\t', header=None, index=False)
49 | 	df = pd.read_csv(train_out, delimiter='\t', header=None)
50 | 	df.to_csv(train_out, sep='\t', header=None)
51 | 
52 | if __name__ == '__main__':
53 | 	set_names = ['train', 'val', 'test']
54 | 	for name in set_names:
55 | 		gen_list(name)
56 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | 


--------------------------------------------------------------------------------
/common/modelzoo.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from util import download_file
 3 | 
 4 | _base_model_url = 'http://data.mxnet.io/models/'
 5 | _default_model_info = {
 6 |     'imagenet1k-inception-bn': {'symbol':_base_model_url+'imagenet/inception-bn/Inception-BN-symbol.json',
 7 |                              'params':_base_model_url+'imagenet/inception-bn/Inception-BN-0126.params'},
 8 |     'imagenet1k-resnet-18': {'symbol':_base_model_url+'imagenet/resnet/18-layers/resnet-18-symbol.json',
 9 |                              'params':_base_model_url+'imagenet/resnet/18-layers/resnet-18-0000.params'},
10 |     'imagenet1k-resnet-34': {'symbol':_base_model_url+'imagenet/resnet/34-layers/resnet-34-symbol.json',
11 |                              'params':_base_model_url+'imagenet/resnet/34-layers/resnet-34-0000.params'},
12 |     'imagenet1k-resnet-50': {'symbol':_base_model_url+'imagenet/resnet/50-layers/resnet-50-symbol.json',
13 |                              'params':_base_model_url+'imagenet/resnet/50-layers/resnet-50-0000.params'},
14 |     'imagenet1k-resnet-101': {'symbol':_base_model_url+'imagenet/resnet/101-layers/resnet-101-symbol.json',
15 |                              'params':_base_model_url+'imagenet/resnet/101-layers/resnet-101-0000.params'},
16 |     'imagenet1k-resnet-152': {'symbol':_base_model_url+'imagenet/resnet/152-layers/resnet-152-symbol.json',
17 |                              'params':_base_model_url+'imagenet/resnet/152-layers/resnet-152-0000.params'},
18 |     'imagenet1k-resnext-50': {'symbol':_base_model_url+'imagenet/resnext/50-layers/resnext-50-symbol.json',
19 |                              'params':_base_model_url+'imagenet/resnext/50-layers/resnext-50-0000.params'},
20 |     'imagenet1k-resnext-101': {'symbol':_base_model_url+'imagenet/resnext/101-layers/resnext-101-symbol.json',
21 |                              'params':_base_model_url+'imagenet/resnext/101-layers/resnext-101-0000.params'},
22 |     'imagenet11k-resnet-152': {'symbol':_base_model_url+'imagenet-11k/resnet-152/resnet-152-symbol.json',
23 |                              'params':_base_model_url+'imagenet-11k/resnet-152/resnet-152-0000.params'},
24 |     'imagenet11k-place365ch-resnet-152': {'symbol':_base_model_url+'imagenet-11k-place365-ch/resnet-152-symbol.json',
25 |                                           'params':_base_model_url+'imagenet-11k-place365-ch/resnet-152-0000.params'},
26 |     'imagenet11k-place365ch-resnet-50': {'symbol':_base_model_url+'imagenet-11k-place365-ch/resnet-50-symbol.json',
27 |                                          'params':_base_model_url+'imagenet-11k-place365-ch/resnet-50-0000.params'},
28 | }
29 | 
30 | def download_model(model_name, dst_dir='./', meta_info=None):
31 |     if meta_info is None:
32 |         meta_info = _default_model_info
33 |     meta_info = dict(meta_info)
34 |     if model_name not in meta_info:
35 |         return (None, 0)
36 |     if not os.path.isdir(dst_dir):
37 |         os.mkdir(dst_dir)
38 |     meta = dict(meta_info[model_name])
39 |     assert 'symbol' in meta, "missing symbol url"
40 |     model_name = os.path.join(dst_dir, model_name)
41 |     download_file(meta['symbol'], model_name+'-symbol.json')
42 |     assert 'params' in meta, "mssing parameter file url"
43 |     download_file(meta['params'], model_name+'-0000.params')
44 |     return (model_name, 0)
45 | 


--------------------------------------------------------------------------------
/fine-tune.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import logging
 4 | logging.basicConfig(level=logging.DEBUG)
 5 | from common import find_mxnet
 6 | from common import data, fit, modelzoo
 7 | import mxnet as mx
 8 | 
 9 | import os, urllib
10 | def download(url):
11 |     filename = url.split("/")[-1]
12 |     if not os.path.exists('model/'+filename):
13 |         urllib.urlretrieve(url, 'model/'+ filename)
14 | 
15 | def get_model(prefix, epoch):
16 |     download(prefix+'-symbol.json')
17 |     download(prefix+'-%04d.params' % (epoch,))
18 | 
19 | def get_fine_tune_model(symbol, arg_params, num_classes, layer_name):
20 |     """
21 |     symbol: the pre-trained network symbol
22 |     arg_params: the argument parameters of the pre-trained model
23 |     num_classes: the number of classes for the fine-tune datasets
24 |     layer_name: the layer name before the last fully-connected layer
25 |     """
26 |     all_layers = sym.get_internals()
27 |     net = all_layers[layer_name+'_output']
28 |     net = mx.symbol.FullyConnected(data=net, num_hidden=num_classes, name='fc') #, lr_mult=10)
29 |     net = mx.symbol.SoftmaxOutput(data=net, name='softmax')
30 |     new_args = dict({k:arg_params[k] for k in arg_params if 'fc' not in k})
31 |     return (net, new_args)
32 | 
33 | 
34 | if __name__ == "__main__":
35 |     # parse args
36 |     parser = argparse.ArgumentParser(description="fine-tune a dataset",
37 |                                      formatter_class=argparse.ArgumentDefaultsHelpFormatter)
38 |     train = fit.add_fit_args(parser)
39 |     data.add_data_args(parser)
40 |     aug = data.add_data_aug_args(parser)
41 |     parser.add_argument('--pretrained-model', type=str,
42 |                         help='the pre-trained model')
43 |     parser.add_argument('--layer-before-fullc', type=str, default='flatten0',
44 |                         help='the name of the layer before the last fullc layer')
45 |     # use less augmentations for fine-tune
46 |     data.set_data_aug_level(parser, 1)
47 |     # use a small learning rate and less regularizations
48 |     # when training comes to 10th and 20th epoch
49 | 	# see http://mxnet.io/how_to/finetune.html and Mu's thesis
50 |     # http://www.cs.cmu.edu/~muli/file/mu-thesis.pdf 
51 |     parser.set_defaults(image_shape='3,320,320', num_epochs=30,
52 |                         lr=.01, lr_step_epochs='10,20', wd=0, mom=0)
53 | 
54 |     args = parser.parse_args()
55 | 
56 |     # load pretrained model
57 |     dir_path = os.path.dirname(os.path.realpath(__file__))
58 |     
59 | 	# get the pretrained resnet 152 from official MXNet model zoo
60 | 	# 1k imagenet pretrained
61 |     #get_model('http://data.mxnet.io/models/imagenet/resnet/152-layers/resnet-152', 0)
62 | 	# 11k imagenet resnet 152 has stronger classification power
63 |      get_model('http://data.mxnet.io/models/imagenet-11k/resnet-152/resnet-152', 0)
64 |     prefix = 'model/resnet-152'
65 |     epoch = 0
66 |     sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
67 | 
68 |     # remove the last fullc layer
69 |     (new_sym, new_args) = get_fine_tune_model(
70 |         sym, arg_params, args.num_classes, args.layer_before_fullc)
71 | 
72 |     
73 |     # train
74 |     fit.fit(args        = args,
75 |             network     = new_sym,
76 |             data_loader = data.get_rec_iter,
77 |             arg_params  = new_args,
78 |             aux_params  = aux_params)
79 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # iNaturalist
 2 | MXNet fine-tune baseline script (resnet 152 layers) for iNaturalist Challenge at FGVC 2017, public LB score 0.117 from a single 21st epoch submission without ensemble.
 3 | 
 4 | ## How to use
 5 | 
 6 | ### Install MXNet 
 7 | 
 8 | Run `pip install mxnet-cu80` after installing CUDA driver or go to <https://github.com/dmlc/mxnet/> for the latest version from Github.
 9 | 
10 | Windows users? no CUDA 8.0? no GPU? Please run `pip search mxnet` and find the good package for your platform.
11 | 
12 | ### Generate lists
13 | 
14 | After downloading and unzipping the train and test set in to `data`, along with the necessary `.json` annotation files, run `python mx_list.py` under `data` and generate `train.lst` `val.lst` `test.lst`
15 | 
16 | ### Generate rec files
17 | 
18 | A good way to speed up training is maximizing the IO by using `.rec` format, which also provides convenience of data augmentation. In the `data/` directory, `gen_rec.sh` can generate `train.rec` and `val.rec` for the train and validate datasets, and `im2rec.py` can be obtained from MXNet repo <https://github.com/dmlc/mxnet/tree/master/tools> . One can adjust `--quality 95` parameter to lower quality for saving disk space, but it may take risk of loosing training precision.
19 | 
20 | ### Train
21 | 
22 | Run `sh run.sh` which looks like (a 4 GTX 1080 machine for example):
23 | 
24 | ```
25 | python fine-tune.py --pretrained-model model/resnet-152 \
26 |     --load-epoch 0 --gpus 0,1,2,3 \
27 |     --model-prefix model/iNat-resnet-152 \
28 | 	--data-nthreads 48 \
29 |     --batch-size 48 --num-classes 5089 --num-examples 579184
30 | ```
31 | 
32 | please adjust `--gpus` and `--batch-size` according to the machine configuration. A sample calculation: `batch-size = 12` can use 8 GB memory on a GTX 1080, so `--batch-size 48` is good for a 4-GPU machine.
33 | 
34 | Please have internet connection for the first time run because needs to download the pretrained model from <http://data.mxnet.io/models/imagenet-11k/resnet-152/>. If the machine has no internet connection, please download the corresponding model files from other machines, and ship to `model/` directory.
35 | 
36 | ### Generate submission file
37 | 
38 | After a long run of some epochs, e.g. 30 epochs, we can select some epochs for the submission file. Run `sub.py` which two parameters : `num of epoch` and `gpu id` like:
39 | 
40 | ```
41 | python sub.py 21 0
42 | ```
43 | 
44 | selects the 21st epoch and infer on GPU `#0`. One can merge multiple epoch results on different GPUs and ensemble for a good submission file.
45 | 
46 | ## How 'fine-tune' works
47 | 
48 | Fine-tune method starts with loading a pretrained ResNet 152 layers (Imagenet 11k classes) from MXNet model zoo, where the model has gained some prediction power, and applies the new data by learning from provided data. 
49 | 
50 | The key technique is from `lr_step_epochs` where we assign a small learning rate and less regularizations when approach to certain epochs. In this example, we give `lr_step_epochs='10,20'` which means the learning rate changes slower when approach to 10th and 20th epoch, so the fine-tune procedure can converge the network and learn from the provided new samples. A similar thought is applied to the data augmentations where fine tune is given less augmentation. This technique is described in Mu's thesis <http://www.cs.cmu.edu/~muli/file/mu-thesis.pdf> 
51 | 
52 | This pipeline is not limited to ResNet-152 pretrained model. Please experiment the fine tune method with other models, like ResNet 101, Inception, from MXNet's model zoo <http://data.mxnet.io/models/> by following this tutorial <http://mxnet.io/how_to/finetune.html> and this sample code <https://github.com/dmlc/mxnet/blob/master/example/image-classification/fine-tune.py> . Please feel free submit issues and/or pull requests and/or discuss on the Kaggle forum if have better results.
53 | 
54 | ## Reference
55 | 
56 | * MXNet's model zoo <http://data.mxnet.io/models/>
57 | * MXNet fine tune <http://mxnet.io/how_to/finetune.html> <https://github.com/dmlc/mxnet/blob/master/example/image-classification/fine-tune.py>
58 | * Mu Li's thesis <http://www.cs.cmu.edu/~muli/file/mu-thesis.pdf> 
59 | * iNaturalist Challenge at FGVC 2017 <https://www.kaggle.com/c/inaturalist-challenge-at-fgvc-2017/>


--------------------------------------------------------------------------------
/common/data.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import random
  3 | from mxnet.io import DataBatch, DataIter
  4 | import numpy as np
  5 | 
  6 | def add_data_args(parser):
  7 |     data = parser.add_argument_group('Data', 'the input images')
  8 |     #data.add_argument('--data-train', type=str, help='the training data')
  9 |     #data.add_argument('--data-val', type=str, help='the validation data')
 10 |     data.add_argument('--rgb-mean', type=str, default='123.68,116.779,103.939',
 11 |                       help='a tuple of size 3 for the mean rgb')
 12 |     data.add_argument('--pad-size', type=int, default=0,
 13 |                       help='padding the input image')
 14 |     data.add_argument('--image-shape', type=str,
 15 |                       help='the image shape feed into the network, e.g. (3,224,224)')
 16 |     data.add_argument('--num-classes', type=int, help='the number of classes')
 17 |     data.add_argument('--num-examples', type=int, help='the number of training examples')
 18 |     data.add_argument('--data-nthreads', type=int, default=4,
 19 |                       help='number of threads for data decoding')
 20 |     data.add_argument('--benchmark', type=int, default=0,
 21 |                       help='if 1, then feed the network with synthetic data')
 22 |     data.add_argument('--dtype', type=str, default='float32',
 23 |                       help='data type: float32 or float16')
 24 |     return data
 25 | 
 26 | def add_data_aug_args(parser):
 27 |     aug = parser.add_argument_group(
 28 |         'Image augmentations', 'implemented in src/io/image_aug_default.cc')
 29 |     aug.add_argument('--random-crop', type=int, default=1,
 30 |                      help='if or not randomly crop the image')
 31 |     aug.add_argument('--random-mirror', type=int, default=1,
 32 |                      help='if or not randomly flip horizontally')
 33 |     aug.add_argument('--max-random-h', type=int, default=0,
 34 |                      help='max change of hue, whose range is [0, 180]')
 35 |     aug.add_argument('--max-random-s', type=int, default=0,
 36 |                      help='max change of saturation, whose range is [0, 255]')
 37 |     aug.add_argument('--max-random-l', type=int, default=0,
 38 |                      help='max change of intensity, whose range is [0, 255]')
 39 |     aug.add_argument('--max-random-aspect-ratio', type=float, default=0,
 40 |                      help='max change of aspect ratio, whose range is [0, 1]')
 41 |     aug.add_argument('--max-random-rotate-angle', type=int, default=0,
 42 |                      help='max angle to rotate, whose range is [0, 360]')
 43 |     aug.add_argument('--max-random-shear-ratio', type=float, default=0,
 44 |                      help='max ratio to shear, whose range is [0, 1]')
 45 |     aug.add_argument('--max-random-scale', type=float, default=1,
 46 |                      help='max ratio to scale')
 47 |     aug.add_argument('--min-random-scale', type=float, default=1,
 48 |                      help='min ratio to scale, should >= img_size/input_shape. otherwise use --pad-size')
 49 |     return aug
 50 | 
 51 | def set_data_aug_level(aug, level):
 52 |     if level >= 1:
 53 |         aug.set_defaults(random_crop=1, random_mirror=1)
 54 |     if level >= 2:
 55 |         aug.set_defaults(max_random_h=36, max_random_s=50, max_random_l=50)
 56 |     if level >= 3:
 57 |         aug.set_defaults(max_random_rotate_angle=10, max_random_shear_ratio=0.1, max_random_aspect_ratio=0.25)
 58 | 
 59 | 
 60 | class SyntheticDataIter(DataIter):
 61 |     def __init__(self, num_classes, data_shape, max_iter, dtype):
 62 |         self.batch_size = data_shape[0]
 63 |         self.cur_iter = 0
 64 |         self.max_iter = max_iter
 65 |         self.dtype = dtype
 66 |         label = np.random.randint(0, num_classes, [self.batch_size,])
 67 |         data = np.random.uniform(-1, 1, data_shape)
 68 |         self.data = mx.nd.array(data, dtype=self.dtype)
 69 |         self.label = mx.nd.array(label, dtype=self.dtype)
 70 |     def __iter__(self):
 71 |         return self
 72 |     @property
 73 |     def provide_data(self):
 74 |         return [mx.io.DataDesc('data', self.data.shape, self.dtype)]
 75 |     @property
 76 |     def provide_label(self):
 77 |         return [mx.io.DataDesc('softmax_label', (self.batch_size,), self.dtype)]
 78 |     def next(self):
 79 |         self.cur_iter += 1
 80 |         if self.cur_iter <= self.max_iter:
 81 |             return DataBatch(data=(self.data,),
 82 |                              label=(self.label,),
 83 |                              pad=0,
 84 |                              index=None,
 85 |                              provide_data=self.provide_data,
 86 |                              provide_label=self.provide_label)
 87 |         else:
 88 |             raise StopIteration
 89 |     def __next__(self):
 90 |         return self.next()
 91 |     def reset(self):
 92 |         self.cur_iter = 0
 93 | 
 94 | def get_rec_iter(args, kv=None):
 95 |     image_shape = tuple([int(l) for l in args.image_shape.split(',')])
 96 |     dtype = np.float32;
 97 |     if 'dtype' in args:
 98 |         if args.dtype == 'float16':
 99 |             dtype = np.float16
100 |     if 'benchmark' in args and args.benchmark:
101 |         data_shape = (args.batch_size,) + image_shape
102 |         train = SyntheticDataIter(args.num_classes, data_shape, 50, dtype)
103 |         return (train, None)
104 |     if kv:
105 |         (rank, nworker) = (kv.rank, kv.num_workers)
106 |     else:
107 |         (rank, nworker) = (0, 1)
108 |     rgb_mean = [float(i) for i in args.rgb_mean.split(',')]
109 |     train = mx.img.ImageIter(
110 |         label_width         = 1,
111 | 	path_root	    = 'data/', 
112 |         #path_imglist         = args.data_train,
113 | 		path_imgrec      = 'data/train.rec',
114 | 		path_imgidx     = 'data/train.idx',
115 |         data_shape          = (3, 320, 320),
116 |         batch_size          = args.batch_size,
117 |         rand_crop           = True,
118 |         rand_resize         = True,
119 |         rand_mirror         = True,
120 |         shuffle             = True,
121 |         brightness          = 0.4,
122 |         contrast            = 0.4,
123 |         saturation          = 0.4,
124 |         pca_noise           = 0.1,
125 |         num_parts           = nworker,
126 |         part_index          = rank)
127 |     #if args.data_val is None:
128 |     #    return (train, None)
129 |     val = mx.img.ImageIter(
130 |         label_width         = 1,
131 | 	path_root	    = 'data/', 
132 |         #path_imglist         = args.data_val,
133 | 		path_imgrec      = 'data/val.rec',
134 | 		path_imgidx     = 'data/val.idx',
135 |         batch_size          = args.batch_size,
136 |         data_shape          =  (3, 320, 320),
137 |         resize		    = 360, 
138 |         rand_crop           = False,
139 |         rand_resize         = False,
140 |         rand_mirror         = False,
141 |         num_parts           = nworker,
142 |         part_index          = rank)
143 |     return (train, val)
144 | 


--------------------------------------------------------------------------------
/sub.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | os.environ['MXNET_CUDNN_AUTOTUNE_DEFAULT'] = '0'
  3 | import sys
  4 | import numpy as np
  5 | import cv2
  6 | import json
  7 | from common import find_mxnet
  8 | import mxnet as mx
  9 | 
 10 | def ch_dev(arg_params, aux_params, ctx):
 11 |     new_args = dict()
 12 |     new_auxs = dict()
 13 |     for k, v in arg_params.items():
 14 |         new_args[k] = v.as_in_context(ctx)
 15 |     for k, v in aux_params.items():
 16 |         new_auxs[k] = v.as_in_context(ctx)
 17 |     return new_args, new_auxs
 18 | 
 19 | def oversample(images, crop_dims):
 20 | 
 21 |     im_shape = np.array(images.shape)
 22 |     crop_dims = np.array(crop_dims)
 23 |     im_center = im_shape[:2] / 2.0
 24 | 
 25 |     h_indices = (0, im_shape[0] - crop_dims[0])
 26 |     w_indices = (0, im_shape[1] - crop_dims[1])
 27 |     crops_ix = np.empty((5, 4), dtype=int)
 28 |     curr = 0
 29 |     for i in h_indices:
 30 |         for j in w_indices:
 31 |             crops_ix[curr] = (i, j, i + crop_dims[0], j + crop_dims[1])
 32 |             curr += 1
 33 |     crops_ix[4] = np.tile(im_center, (1, 2)) + np.concatenate([
 34 |         -crop_dims / 2.0,
 35 |          crop_dims / 2.0
 36 |     ])
 37 |     crops_ix = np.tile(crops_ix, (2, 1))
 38 | 
 39 |     # print crops_ix
 40 | 
 41 |     # Extract crops
 42 |     crops = np.empty((10, crop_dims[0], crop_dims[1],
 43 |                       im_shape[-1]), dtype=np.float32)
 44 |     ix = 0
 45 |     # for im in images:
 46 |     im = images
 47 |     # print im.shape
 48 |     for crop in crops_ix:
 49 |         # print crop
 50 |         crops[ix] = im[crop[0]:crop[2], crop[1]:crop[3], :]
 51 |         # cv2.imshow('crop', im[crop[0]:crop[2], crop[1]:crop[3], :])
 52 |         # cv2.waitKey()
 53 |         ix += 1
 54 |     crops[ix-5:ix] = crops[ix-5:ix, :, ::-1, :]
 55 |     # cv2.imshow('crop', crops[0,:,:,:])
 56 |     # cv2.waitKey()
 57 |     return crops
 58 | 
 59 | prefix = 'model/iNat-resnet-152'
 60 | epoch = int(sys.argv[1]) #check point step
 61 | gpu_id = int(sys.argv[2]) #GPU ID for infer
 62 | ctx = mx.gpu(gpu_id)
 63 | sym, arg_params, aux_params = mx.model.load_checkpoint(prefix, epoch)
 64 | arg_params, aux_params = ch_dev(arg_params, aux_params, ctx)
 65 | 
 66 | 
 67 | ann_file = 'data/test2017.json'
 68 | print('Loading annotations from: ' + os.path.basename(ann_file))
 69 | with open(ann_file) as data_file:
 70 |     ann_data = json.load(data_file)
 71 | 
 72 | imgs = [aa['file_name'] for aa in ann_data['images']]
 73 | im_ids = [aa['id'] for aa in ann_data['images']]
 74 | if 'annotations' in ann_data.keys():
 75 |     # if we have class labels
 76 |     classes = [aa['category_id'] for aa in ann_data['annotations']]
 77 | else:
 78 |     # otherwise dont have class info so set to 0
 79 |     classes = [0]*len(im_ids)
 80 | 
 81 | idx_to_class = {cc['id']: cc['name'] for cc in ann_data['categories']}
 82 | 
 83 | 
 84 | 
 85 | top1_acc = 0
 86 | top5_acc = 0
 87 | cnt = 0
 88 | img_sz = 360
 89 | crop_sz = 320
 90 | 
 91 | preds = []
 92 | im_idxs = []
 93 | batch_sz = 256
 94 | input_blob = np.zeros((batch_sz,3,crop_sz,crop_sz))
 95 | idx = 0
 96 | num_batches = int(len(imgs) / batch_sz)
 97 | 
 98 | for batch_head in range(0, batch_sz*num_batches, batch_sz):
 99 |     #print batch_head
100 |     for index in range(batch_head, batch_head+batch_sz):
101 | 	img_name = imgs[index]
102 |         label = str(classes[index])
103 |         im_id = str(im_ids[index])
104 |         im_idxs.append(int(im_id))
105 |         cnt += 1
106 |         img_full_name = 'data/test2017/' + img_name
107 |         img = cv2.cvtColor(cv2.imread(img_full_name), cv2.COLOR_BGR2RGB)
108 |         img = np.float32(img)
109 | 
110 |         rows, cols = img.shape[:2]
111 |         if cols < rows:
112 |             resize_width = img_sz
113 |             resize_height = resize_width * rows / cols;
114 |         else:
115 |             resize_height = img_sz
116 |             resize_width = resize_height * cols / rows;
117 | 
118 |         img = cv2.resize(img, (resize_width, resize_height), interpolation=cv2.INTER_CUBIC)
119 | 
120 | 	h, w, _ = img.shape
121 | 
122 |         x0 = int((w - crop_sz) / 2)
123 |         y0 = int((h - crop_sz) / 2)
124 |         img = img[y0:y0+crop_sz, x0:x0+crop_sz]
125 | 
126 |         img = np.swapaxes(img, 0, 2)
127 |         img = np.swapaxes(img, 1, 2)  # change to r,g,b order
128 |         input_blob[idx,:,:,:] = img
129 |         idx += 1
130 | 	#print(idx)
131 | 
132 |     idx = 0
133 | 
134 | 
135 |     arg_params["data"] = mx.nd.array(input_blob, ctx)
136 |     arg_params["softmax_label"] = mx.nd.empty((batch_sz,), ctx)
137 |     exe = sym.bind(ctx, arg_params ,args_grad=None, grad_req="null", aux_states=aux_params)
138 |     exe.forward(is_train=False)
139 |     net_out = exe.outputs[0].asnumpy()
140 | 
141 |     input_blob = np.zeros((batch_sz,3,crop_sz,crop_sz))
142 | 
143 |     for bz in range(batch_sz):
144 | 	probs = net_out[bz,:]
145 |     	score = np.squeeze(probs)
146 | 
147 |         sort_index = np.argsort(score)[::-1]
148 |         top_k = sort_index[0:5]
149 |         preds.append(top_k.astype(np.int))
150 | 	print(preds[-1], batch_head+bz)
151 | 
152 | 
153 | 
154 | for index in range(batch_sz*num_batches, len(imgs)):
155 | 	img_name = imgs[index]
156 |         label = str(classes[index])
157 |         im_id = str(im_ids[index])
158 |         im_idxs.append(int(im_id))
159 |         cnt += 1
160 |         img_full_name = 'data/test2017/' + img_name
161 |         img = cv2.cvtColor(cv2.imread(img_full_name), cv2.COLOR_BGR2RGB)
162 |         img = np.float32(img)
163 | 
164 |         rows, cols = img.shape[:2]
165 |         if cols < rows:
166 |             resize_width = img_sz
167 |             resize_height = resize_width * rows / cols;
168 |         else:
169 |             resize_height = img_sz
170 |             resize_width = resize_height * cols / rows;
171 | 
172 |         img = cv2.resize(img, (resize_width, resize_height), interpolation=cv2.INTER_CUBIC)
173 | 
174 |         #batch = oversample(img, (crop_sz,crop_sz))
175 | 
176 |         h, w, _ = img.shape
177 | 
178 |         x0 = int((w - crop_sz) / 2)
179 |         y0 = int((h - crop_sz) / 2)
180 |         img = img[y0:y0+crop_sz, x0:x0+crop_sz]
181 | 
182 |         img = np.swapaxes(img, 0, 2)
183 |         img = np.swapaxes(img, 1, 2)  # change to r,g,b order
184 | 
185 |         img = img[np.newaxis, :]
186 |         arg_params["data"] = mx.nd.array(img, ctx)
187 |         #arg_params["data"] = mx.nd.array(input_blob, ctx)
188 |         arg_params["softmax_label"] = mx.nd.empty((1,), ctx)
189 |         exe = sym.bind(ctx, arg_params ,args_grad=None, grad_req="null", aux_states=aux_params)
190 |         exe.forward(is_train=False)
191 |         probs = exe.outputs[0].asnumpy()
192 |         score = np.squeeze(probs.mean(axis=0))
193 | 
194 |         sort_index = np.argsort(score)[::-1]
195 |         top_k = sort_index[0:5]
196 | 	#print(top_k)
197 | 
198 |         preds.append(top_k.astype(np.int))
199 | 	print(preds[-1], im_idxs[-1])
200 | 	#print(top_k.astype(np.int), int(im_id))
201 | 	#print(preds[index], im_idxs[index])
202 | 
203 | im_idxs = np.hstack(im_idxs)
204 | preds = np.vstack(preds)
205 | 
206 | 
207 | with open("submission_epoch_%d.csv"%(epoch), 'w') as opfile:
208 | 	opfile.write('id,predicted\n')
209 |         for ii in range(len(im_idxs)):
210 |         	opfile.write(str(im_idxs[ii]) + ',' + ' '.join(str(x) for x in preds[ii,:])+'\n')
211 | 
212 | 
213 | 
214 | 
215 | 


--------------------------------------------------------------------------------
/common/fit.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import logging
  3 | import os
  4 | import time
  5 | 
  6 | def _get_lr_scheduler(args, kv):
  7 |     if 'lr_factor' not in args or args.lr_factor >= 1:
  8 |         return (args.lr, None)
  9 |     epoch_size = args.num_examples / args.batch_size
 10 |     if 'dist' in args.kv_store:
 11 |         epoch_size /= kv.num_workers
 12 |     begin_epoch = args.load_epoch if args.load_epoch else 0
 13 |     step_epochs = [int(l) for l in args.lr_step_epochs.split(',')]
 14 |     lr = args.lr
 15 |     for s in step_epochs:
 16 |         if begin_epoch >= s:
 17 |             lr *= args.lr_factor
 18 |     if lr != args.lr:
 19 |         logging.info('Adjust learning rate to %e for epoch %d' %(lr, begin_epoch))
 20 | 
 21 |     steps = [epoch_size * (x-begin_epoch) for x in step_epochs if x-begin_epoch > 0]
 22 |     return (lr, mx.lr_scheduler.MultiFactorScheduler(step=steps, factor=args.lr_factor))
 23 | 
 24 | def _load_model(args, rank=0):
 25 |     if 'load_epoch' not in args or args.load_epoch is None:
 26 |         return (None, None, None)
 27 |     assert args.model_prefix is not None
 28 |     model_prefix = args.model_prefix
 29 |     if rank > 0 and os.path.exists("%s-%d-symbol.json" % (model_prefix, rank)):
 30 |         model_prefix += "-%d" % (rank)
 31 |     sym, arg_params, aux_params = mx.model.load_checkpoint(
 32 |         model_prefix, args.load_epoch)
 33 |     logging.info('Loaded model %s_%04d.params', model_prefix, args.load_epoch)
 34 |     return (sym, arg_params, aux_params)
 35 | 
 36 | def _save_model(args, rank=0):
 37 |     if args.model_prefix is None:
 38 |         return None
 39 |     dst_dir = os.path.dirname(args.model_prefix)
 40 |     if not os.path.isdir(dst_dir):
 41 |         os.mkdir(dst_dir)
 42 |     return mx.callback.do_checkpoint(args.model_prefix if rank == 0 else "%s-%d" % (
 43 |         args.model_prefix, rank))
 44 | 
 45 | def add_fit_args(parser):
 46 |     """
 47 |     parser : argparse.ArgumentParser
 48 |     return a parser added with args required by fit
 49 |     """
 50 |     train = parser.add_argument_group('Training', 'model training')
 51 |     train.add_argument('--network', type=str,
 52 |                        help='the neural network to use')
 53 |     train.add_argument('--num-layers', type=int,
 54 |                        help='number of layers in the neural network, required by some networks such as resnet')
 55 |     train.add_argument('--gpus', type=str,
 56 |                        help='list of gpus to run, e.g. 0 or 0,2,5. empty means using cpu')
 57 |     train.add_argument('--kv-store', type=str, default='device',
 58 |                        help='key-value store type')
 59 |     train.add_argument('--num-epochs', type=int, default=100,
 60 |                        help='max num of epochs')
 61 |     train.add_argument('--lr', type=float, default=0.1,
 62 |                        help='initial learning rate')
 63 |     train.add_argument('--lr-factor', type=float, default=0.1,
 64 |                        help='the ratio to reduce lr on each step')
 65 |     train.add_argument('--lr-step-epochs', type=str,
 66 |                        help='the epochs to reduce the lr, e.g. 30,60')
 67 |     train.add_argument('--optimizer', type=str, default='sgd',
 68 |                        help='the optimizer type')
 69 |     train.add_argument('--mom', type=float, default=0.9,
 70 |                        help='momentum for sgd')
 71 |     train.add_argument('--wd', type=float, default=0.0001,
 72 |                        help='weight decay for sgd')
 73 |     train.add_argument('--batch-size', type=int, default=128,
 74 |                        help='the batch size')
 75 |     train.add_argument('--disp-batches', type=int, default=20,
 76 |                        help='show progress for every n batches')
 77 |     train.add_argument('--model-prefix', type=str,
 78 |                        help='model prefix')
 79 |     parser.add_argument('--monitor', dest='monitor', type=int, default=0,
 80 |                         help='log network parameters every N iters if larger than 0')
 81 |     train.add_argument('--load-epoch', type=int,
 82 |                        help='load the model on an epoch using the model-load-prefix')
 83 |     train.add_argument('--top-k', type=int, default=5,
 84 |                        help='report the top-k accuracy. 0 means no report.')
 85 |     train.add_argument('--test-io', type=int, default=0,
 86 |                        help='1 means test reading speed without training')
 87 |     return train
 88 | 
 89 | def fit(args, network, data_loader, **kwargs):
 90 |     """
 91 |     train a model
 92 |     args : argparse returns
 93 |     network : the symbol definition of the nerual network
 94 |     data_loader : function that returns the train and val data iterators
 95 |     """
 96 |     # kvstore
 97 |     kv = mx.kvstore.create(args.kv_store)
 98 | 
 99 |     # logging
100 |     head = '%(asctime)-15s Node[' + str(kv.rank) + '] %(message)s'
101 |     logging.basicConfig(level=logging.DEBUG, format=head)
102 |     logging.info('start with arguments %s', args)
103 | 
104 |     # data iterators
105 |     (train, val) = data_loader(args, kv)
106 |     if args.test_io:
107 |         tic = time.time()
108 |         for i, batch in enumerate(train):
109 |             for j in batch.data:
110 |                 j.wait_to_read()
111 |             if (i+1) % args.disp_batches == 0:
112 |                 logging.info('Batch [%d]\tSpeed: %.2f samples/sec' % (
113 |                     i, args.disp_batches*args.batch_size/(time.time()-tic)))
114 |                 tic = time.time()
115 | 
116 |         return
117 | 
118 | 
119 |     # load model
120 |     if 'arg_params' in kwargs and 'aux_params' in kwargs:
121 |         arg_params = kwargs['arg_params']
122 |         aux_params = kwargs['aux_params']
123 |     else:
124 |         sym, arg_params, aux_params = _load_model(args, kv.rank)
125 |         if sym is not None:
126 |             assert sym.tojson() == network.tojson()
127 | 
128 |     # save model
129 |     checkpoint = _save_model(args, kv.rank)
130 | 
131 |     # devices for training
132 |     devs = mx.cpu() if args.gpus is None or args.gpus is '' else [
133 |         mx.gpu(int(i)) for i in args.gpus.split(',')]
134 | 
135 |     # learning rate
136 |     lr, lr_scheduler = _get_lr_scheduler(args, kv)
137 | 
138 |     # create model
139 |     model = mx.mod.Module(
140 |         context       = devs,
141 |         symbol        = network
142 |     )
143 | 
144 |     lr_scheduler  = lr_scheduler
145 |     optimizer_params = {
146 |             'learning_rate': lr,
147 |             'momentum' : args.mom,
148 |             'wd' : args.wd,
149 |             'lr_scheduler': lr_scheduler}
150 | 
151 |     monitor = mx.mon.Monitor(args.monitor, pattern=".*") if args.monitor > 0 else None
152 | 
153 |     if args.network == 'alexnet':
154 |         # AlexNet will not converge using Xavier
155 |         initializer = mx.init.Normal()
156 |     else:
157 |         initializer = mx.init.Xavier(
158 |             rnd_type='gaussian', factor_type="in", magnitude=2)
159 |     # initializer   = mx.init.Xavier(factor_type="in", magnitude=2.34),
160 | 
161 |     # evaluation metrices
162 |     eval_metrics = ['accuracy']
163 |     if args.top_k > 0:
164 |         eval_metrics.append(mx.metric.create('top_k_accuracy', top_k=args.top_k))
165 | 
166 |     # callbacks that run after each batch
167 |     batch_end_callbacks = [mx.callback.Speedometer(args.batch_size, args.disp_batches)]
168 |     if 'batch_end_callback' in kwargs:
169 |         cbs = kwargs['batch_end_callback']
170 |         batch_end_callbacks += cbs if isinstance(cbs, list) else [cbs]
171 | 
172 |     # run
173 |     model.fit(train,
174 |         begin_epoch        = args.load_epoch if args.load_epoch else 0,
175 |         num_epoch          = args.num_epochs,
176 |         eval_data          = val,
177 |         eval_metric        = eval_metrics,
178 |         kvstore            = kv,
179 |         optimizer          = args.optimizer,
180 |         optimizer_params   = optimizer_params,
181 |         initializer        = initializer,
182 |         arg_params         = arg_params,
183 |         aux_params         = aux_params,
184 |         batch_end_callback = batch_end_callbacks,
185 |         epoch_end_callback = checkpoint,
186 |         allow_missing      = True,
187 |         monitor            = monitor)
188 | 


--------------------------------------------------------------------------------