├── RL ├── find_mxnet.py ├── run.sh ├── symbol_alexnet.py ├── batch_provider_mars.py ├── replay_memory.py ├── batch_provider.py ├── utils.py ├── tb_system.py ├── base_module.py ├── agent.py ├── segment_tree.py ├── rnn_models.py ├── symbol_inception-bn.py ├── mars_test.py ├── img_lib.py ├── mars_test_baseline.py ├── symbols.py ├── find_eg.py ├── dqn_mars.py └── dqn.py ├── baseline ├── find_mxnet.py ├── run.sh ├── symbol_alexnet.py ├── verifi_iterator.py ├── utils.py ├── preprocess_mars_image.py ├── extract.py ├── preprocess_ilds_image.py ├── preprocess_prid_image.py ├── symbol_inception-bn.py ├── even_iterator.py ├── baseline_test.py ├── calc_cmc.py ├── baseline.py ├── lsoftmax.py └── loss_layers.py └── README.md /RL/find_mxnet.py: -------------------------------------------------------------------------------- 1 | try: 2 | import mxnet as mx 3 | except ImportError: 4 | import os, sys 5 | #curr_path = os.path.abspath(os.path.dirname(__file__)) 6 | #sys.path.append(os.path.join(curr_path, "../../python")) 7 | sys.path.append('/home/tina/reid/mxnet/python') 8 | import mxnet as mx 9 | -------------------------------------------------------------------------------- /baseline/find_mxnet.py: -------------------------------------------------------------------------------- 1 | try: 2 | import mxnet as mx 3 | except ImportError: 4 | import os, sys 5 | #curr_path = os.path.abspath(os.path.dirname(__file__)) 6 | #sys.path.append(os.path.join(curr_path, "../../python")) 7 | sys.path.append('/home/tina/reid/mxnet/python') 8 | import mxnet as mx 9 | -------------------------------------------------------------------------------- /RL/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | sets=0 4 | gpus=$1 5 | up=$2 6 | data_dir=$5 7 | case $3 in 8 | iLiDS-VID) 9 | main=dqn.py 10 | base=ilds_$4_$sets 11 | num_id=150 12 | train_set=image_valid$sets 13 | valid_set=image_test$sets 14 | ;; 15 | PRID-2011) 16 | main=dqn.py 17 | base=prid_$4_$sets 18 | num_id=100 19 | train_set=image_valid$sets 20 | valid_set=image_test$sets 21 | ;; 22 | MARS) 23 | main=dqn_mars.py 24 | base=mars_$4 25 | num_id=624 26 | train_set=image_valid 27 | valid_set=image_test 28 | ;; 29 | *) 30 | echo "No valid dataset" 31 | exit 32 | ;; 33 | esac 34 | 35 | bs=8 36 | ss=8 37 | ms=$ss 38 | lr=1e-4 39 | epochs=100 40 | ts=$(date "+%Y.%m.%d-%H.%M.%S") 41 | qg=0.9 42 | nh=128 43 | ns=32 44 | mode=DQN_test-$sets-$ts-bs$bs-ss$ss-$4 45 | 46 | python $main --gpus $gpus --data-dir $data_dir \ 47 | --num-examples 100000 --num-id $num_id \ 48 | --train-set $train_set --valid-set $valid_set \ 49 | --sample-size $ss --batch-size $bs \ 50 | --lr $lr --num-epoches $epochs --mode $3-TEST-$mode \ 51 | --model-load-epoch 1 --model-load-prefix $base --q-gamma $qg \ 52 | --penalty $up --num_hidden $nh --num_sim $ns \ 53 | --min-states $ms --optimizer sgd \ 54 | --epsilon --e2e --fusion -------------------------------------------------------------------------------- /baseline/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | bs=8 4 | epochs=1 5 | sets=0 6 | gpus=$1 7 | data_dir=$4 8 | case $2 in 9 | iLiDS-VID) 10 | base=ilds_$3_$sets 11 | num_id=150 12 | train_set=image_valid$sets 13 | valid_set=image_test$sets 14 | ;; 15 | PRID-2011) 16 | base=prid_$3_$sets 17 | num_id=100 18 | train_set=image_valid$sets 19 | valid_set=image_test$sets 20 | ;; 21 | MARS) 22 | base=mars_$3 23 | num_id=624 24 | train_set=image_valid 25 | valid_set=image_test 26 | ;; 27 | *) 28 | echo "No valid dataset" 29 | exit 30 | ;; 31 | esac 32 | 33 | case $3 in 34 | alexnet) 35 | python baseline.py --gpus $gpus --data-dir $data_dir \ 36 | --num-id $num_id --batch-size $bs \ 37 | --train-set $train_set --valid-set $valid_set \ 38 | --lr 1e-4 --num-epoches $epochs --mode $mode \ 39 | --network alexnet --model-load-prefix alexnet --model-load-epoch 1 40 | ;; 41 | inception-bn) 42 | python baseline.py --gpus $gpus --data-dir $data_dir \ 43 | --num-id $num_id --batch-size $bs \ 44 | --train-set $train_set --valid-set $valid_set \ 45 | --lr 1e-2 --num-epoches $epochs --mode $mode --lsoftmax 46 | ;; 47 | *) 48 | echo "No valid basenet" 49 | exit 50 | ;; 51 | esac 52 | -------------------------------------------------------------------------------- /baseline/symbol_alexnet.py: -------------------------------------------------------------------------------- 1 | import find_mxnet 2 | import mxnet as mx 3 | import numpy as np 4 | 5 | 6 | def get_symbol(params=None): 7 | if params is None: 8 | params = dict([(name, mx.sym.Variable(name)) for name in\ 9 | ['conv1_weight', 'conv1_bias', 'conv2_weight', 'conv2_bias', 10 | 'conv3_weight', 'conv3_bias', 'conv4_weight', 'conv4_bias', 11 | 'conv5_weight', 'conv5_bias'] 12 | ]) 13 | 14 | # data 15 | x = mx.symbol.Variable(name="data") 16 | x = mx.sym.Convolution(data=x, kernel=(11, 11), stride=(4, 4), num_filter=96, weight=params['conv1_weight'], bias=params['conv1_bias']) 17 | x = mx.sym.Activation(data=x, act_type='relu') 18 | x = mx.sym.LRN(data=x, alpha=0.0001, beta=0.75, knorm=2, nsize=5) 19 | x = mx.sym.Pooling(data=x, pool_type='max', kernel=(3, 3), stride=(2, 2)) 20 | 21 | x = mx.sym.Convolution(data=x, kernel=(5, 5), pad=(2, 2), num_filter=256, num_group=2, weight=params['conv2_weight'], bias=params['conv2_bias']) 22 | x = mx.sym.Activation(data=x, act_type='relu') 23 | x = mx.sym.LRN(data=x, alpha=0.0001, beta=0.75, knorm=2, nsize=5) 24 | x = mx.sym.Pooling(data=x, kernel=(3, 3), stride=(2, 2), pool_type='max') 25 | 26 | x = mx.sym.Convolution(data=x, kernel=(3, 3), pad=(1, 1), num_filter=384, num_group=1, weight=params['conv3_weight'], bias=params['conv3_bias']) 27 | x = mx.sym.Activation(data=x, act_type='relu') 28 | 29 | x = mx.sym.Convolution(data=x, kernel=(3, 3), pad=(1, 1), num_filter=384, num_group=2, weight=params['conv4_weight'], bias=params['conv4_bias']) 30 | x = mx.sym.Activation(data=x, act_type='relu') 31 | 32 | x = mx.sym.Convolution(data=x, kernel=(3, 3), pad=(1, 1), num_filter=256, num_group=2, weight=params['conv5_weight'], bias=params['conv5_bias']) 33 | x = mx.sym.Activation(data=x, act_type='relu') 34 | x = mx.sym.Pooling(data=x, kernel=(3, 3), stride=(2, 2), pool_type='max') 35 | 36 | return x 37 | -------------------------------------------------------------------------------- /RL/symbol_alexnet.py: -------------------------------------------------------------------------------- 1 | import find_mxnet 2 | import mxnet as mx 3 | import numpy as np 4 | 5 | 6 | def get_symbol(data=None, params=None, fix_gamma=None, global_stats=None): 7 | if params is None: 8 | params = dict([(name, mx.sym.Variable(name)) for name in\ 9 | ['conv1_weight', 'conv1_bias', 'conv2_weight', 'conv2_bias', 10 | 'conv3_weight', 'conv3_bias', 'conv4_weight', 'conv4_bias', 11 | 'conv5_weight', 'conv5_bias'] 12 | ]) 13 | 14 | # data 15 | if data is None: 16 | x = mx.symbol.Variable(name="data") 17 | else: 18 | x = data 19 | x = mx.sym.Convolution(data=x, kernel=(11, 11), stride=(4, 4), num_filter=96, weight=params['conv1_weight'], bias=params['conv1_bias']) 20 | x = mx.sym.Activation(data=x, act_type='relu') 21 | x = mx.sym.LRN(data=x, alpha=0.0001, beta=0.75, knorm=2, nsize=5) 22 | x = mx.sym.Pooling(data=x, pool_type='max', kernel=(3, 3), stride=(2, 2)) 23 | 24 | x = mx.sym.Convolution(data=x, kernel=(5, 5), pad=(2, 2), num_filter=256, num_group=2, weight=params['conv2_weight'], bias=params['conv2_bias']) 25 | x = mx.sym.Activation(data=x, act_type='relu') 26 | x = mx.sym.LRN(data=x, alpha=0.0001, beta=0.75, knorm=2, nsize=5) 27 | x = mx.sym.Pooling(data=x, kernel=(3, 3), stride=(2, 2), pool_type='max') 28 | x = mx.symbol.BlockGrad(x, name='block_conv2') 29 | 30 | x = mx.sym.Convolution(data=x, kernel=(3, 3), pad=(1, 1), num_filter=384, num_group=1, weight=params['conv3_weight'], bias=params['conv3_bias']) 31 | x = mx.sym.Activation(data=x, act_type='relu') 32 | #x = mx.symbol.BlockGrad(x, name='block_conv3') 33 | 34 | x = mx.sym.Convolution(data=x, kernel=(3, 3), pad=(1, 1), num_filter=384, num_group=2, weight=params['conv4_weight'], bias=params['conv4_bias']) 35 | x = mx.sym.Activation(data=x, act_type='relu') 36 | #x = mx.symbol.BlockGrad(x, name='block_conv4') 37 | 38 | x = mx.sym.Convolution(data=x, kernel=(3, 3), pad=(1, 1), num_filter=256, num_group=2, weight=params['conv5_weight'], bias=params['conv5_bias']) 39 | x = mx.sym.Activation(data=x, act_type='relu') 40 | x = mx.sym.Pooling(data=x, kernel=(3, 3), stride=(2, 2), pool_type='max') 41 | #x = mx.symbol.BlockGrad(x, name='block_conv5') 42 | 43 | return x 44 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | * [Multi-shot Re-identification](#1) 2 | * [Preparations](#1.1) 3 | * [Usage](#1.2) 4 | 5 |

Multi-shot Re-identification Based on Reinforcement Learning

6 | 7 | --- 8 | 9 | Training and testing codes for multi-shot Re-Identification. Currently, these codes are tested on the PRID-2011 dataset, iLiDS-VID dataset and MARS dataset. For algorithm details and experiment results, please refer our paper: [Multi-shot Pedestrian Re-identification via Sequential Decision Making](https://arxiv.org/abs/1712.07257) 10 | 11 |

Preparations

12 | 13 | --- 14 | 15 | Before starting running this code, you should make the following preparations: 16 | 17 | * Download the [MARS](http://www.liangzheng.com.cn/Project/project_mars.html) 18 | , [iLIDS-VID](http://www.eecs.qmul.ac.uk/~xiatian/downloads_qmul_iLIDS-VID_ReID_dataset.html) and [PRID-2011](https://www.tugraz.at/institute/icg/research/team-bischof/lrs/downloads/PRID11/). 19 | * Install MXNet following the [instructions](http://mxnet.io/get_started/index.html#setup-and-installation) and install the python interface. Currently the repo is tested on commit e06c55. 20 | 21 |

Usage

22 | 23 | --- 24 | 25 | * Download the datasets and unzip. 26 | * Prepare data file. Generate image list file according to the file `preprocess_ilds_image.py` 27 | , `preprocess_prid_image.py` and `preprocess_mars_image.py` under `baseline` folder. 28 | * The code is split to two stage, the first stage is a image based re-id task, 29 | please refer the script `run.sh` in `baseline` folder. The codes for this stage is based on [this repo](https://github.com/TuSimple/re-identification). The usage is: 30 | ```shell 31 | sh run.sh $gpu $dataset $network $recfloder 32 | ``` 33 | e.g. If you want to train MARS dataset on gpu 0 using inception-bn, please run: 34 | ```shell 35 | sh run.sh 0 MARS inception-bn /data3/matt/MARS/recs 36 | ``` 37 | * The second stage is a multi-shot re-id task based on reinforcement learning. 38 | Please refer the script `run.sh` in `RL` folder. The usage is: 39 | ```shell 40 | sh run.sh $gpu $unsure-penalty $dataset $network $recfloder 41 | ``` 42 | * For evaluation, please use `baseline/baseline_test.py` and `RL/find_eg.py`. In `RL/find_eg.py`, we also show some example episodes with good quality generated by our algorithm. 43 | 44 | 45 | -------------------------------------------------------------------------------- /RL/batch_provider_mars.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | 5 | def get_data(t): 6 | try: 7 | batch = t.next() 8 | except StopIteration: 9 | t.reset() 10 | batch = t.next() 11 | return batch.data[0] 12 | 13 | 14 | class BatchProvider: 15 | def __init__(self, F, is_train, size, sample_ratio=0.5, need_feat=False, start=None, end=None, is_valid=False, agent=None): 16 | self.F = F 17 | self.is_train = is_train 18 | self.size = size 19 | self.N = len(F) 20 | self.sample_ratio = sample_ratio 21 | self.need_feat = need_feat 22 | self.valid = is_valid 23 | self.cnt = 0 24 | self.agent = agent 25 | if start is not None: 26 | self.start, self.end = start, end 27 | else: 28 | self.start, self.end = 0, self.N 29 | self.reset() 30 | 31 | def reset(self): 32 | if self.valid: 33 | self.cnt = 0 34 | self.terminal = [True for _ in xrange(self.size)] 35 | self.A = -1 36 | self.B = -1 37 | self.cA = -1 38 | self.cB = -1 39 | self.tA = -1 40 | self.tB = -1 41 | self.curA = np.zeros((self.size, 3, 224, 112)) 42 | self.curB = np.zeros((self.size, 3, 224, 112)) 43 | 44 | def get_img(self, F, aug=False): 45 | return get_data(F)[0].asnumpy() 46 | 47 | def provide(self, preload=None): 48 | if random.random() < self.sample_ratio: 49 | a = b = (np.random.choice(self.end-self.start, 1) + self.start)[0] 50 | while len(self.F[a]) < 2: 51 | a = b = np.random.choice(self.N, 1)[0] 52 | else: 53 | a, b = (np.random.choice(self.end-self.start, 2, replace=False)+self.start) 54 | self.A, self.B = a, b 55 | if not a == b: 56 | self.cA, self.cB = np.random.choice(len(self.F[a]), 1)[0], np.random.choice(len(self.F[b]), 1)[0] 57 | else: 58 | self.cA, self.cB = np.random.choice(len(self.F[a]), 2, replace=False) 59 | self.tA, self.tB = np.random.choice(len(self.F[a][self.cA]), 1)[0], np.random.choice(len(self.F[b][self.cB]), 1)[0] 60 | print self.A, self.cA, self.tA 61 | for i in xrange(self.size): 62 | self.curA[i] = self.get_img(self.F[self.A][self.cA][self.tA], True)#self.A%self.N==self.B%self.N) 63 | self.curB[i] = self.get_img(self.F[self.B][self.cB][self.tB], True)#self.A%self.N==self.B%self.N) 64 | 65 | cur = [np.array(self.curA), np.array(self.curB)] 66 | return cur, self.A, self.B -------------------------------------------------------------------------------- /RL/replay_memory.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from segment_tree import SumSegmentTree, MinSegmentTree 4 | from utils import copyto 5 | 6 | class ReplayMemory: 7 | def __init__(self, replay_size, alpha=0.6): 8 | self.replay_size = replay_size 9 | self.cnt = 0 10 | self._alpha = alpha 11 | it_capacity = 1 12 | while it_capacity < replay_size: 13 | it_capacity *= 2 14 | 15 | self._it_sum = SumSegmentTree(it_capacity) 16 | self._it_min = MinSegmentTree(it_capacity) 17 | self._max_priority = 1.0 18 | self._storage = [] 19 | self._maxsize = replay_size 20 | self._next_idx = 0 21 | 22 | def add(self, data): 23 | #new_data = [] 24 | #for i in data: 25 | # i.wait_to_read() 26 | # new_data.append(copyto(i)) 27 | if self._next_idx >= len(self._storage): 28 | self._storage.append(data) 29 | else: 30 | self._storage[self._next_idx] = data 31 | self._next_idx = (self._next_idx + 1) % self._maxsize 32 | idx = self._next_idx 33 | self._it_sum[idx] = self._max_priority ** self._alpha 34 | self._it_min[idx] = self._max_priority ** self._alpha 35 | 36 | 37 | def _sample_proportional(self, batch_size): 38 | res = [] 39 | for _ in range(batch_size): 40 | mass = random.random() * self._it_sum.sum(0, len(self._storage) - 1) 41 | idx = self._it_sum.find_prefixsum_idx(mass) 42 | res.append(idx) 43 | return res 44 | 45 | def sample(self, batch_size, beta): 46 | assert beta > 0 47 | 48 | idxes = self._sample_proportional(batch_size) 49 | 50 | weights = [] 51 | p_min = self._it_min.min() / self._it_sum.sum() 52 | max_weight = (p_min * len(self._storage)) ** (-beta) 53 | 54 | for idx in idxes: 55 | p_sample = self._it_sum[idx] / self._it_sum.sum() 56 | weight = (p_sample * len(self._storage)) ** (-beta) 57 | weights.append(weight / max_weight) 58 | #print self._it_min.min(), weights 59 | weights = np.array(weights) 60 | weights /= np.sum(weights) 61 | ret = [] 62 | for i in xrange(batch_size): 63 | ret.append(self._storage[idxes[i]]) 64 | return (ret, idxes, weights) 65 | 66 | def update_priorities(self, idxes, priorities): 67 | assert len(idxes) == len(priorities) 68 | for idx, priority in zip(idxes, priorities): 69 | assert priority > 0 70 | assert 0 <= idx < len(self._storage) 71 | self._it_sum[idx] = priority ** self._alpha 72 | self._it_min[idx] = priority ** self._alpha 73 | 74 | self._max_priority = max(self._max_priority, priority) 75 | 76 | 77 | -------------------------------------------------------------------------------- /baseline/verifi_iterator.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import numpy as np 3 | 4 | 5 | class verifi_iterator(mx.io.DataIter): 6 | ''' 7 | Data iterator 8 | Combine two iterators (one totally shuffles, one contains pairs) 9 | ''' 10 | def __init__(self, data_iter1, data_iter2, 11 | use_verifi=False, use_center=False, use_lsoftmax=False, gpus=1): 12 | super(verifi_iterator, self).__init__() 13 | self.data_iter1 = data_iter1 14 | self.data_iter2 = data_iter2 15 | self.batch_size = self.data_iter1.batch_size * 2 16 | self.gpus = gpus 17 | self.use_verifi = use_verifi 18 | self.use_center = use_center 19 | self.use_lsoftmax = use_lsoftmax 20 | print "gpus", self.gpus 21 | 22 | @property 23 | def provide_data(self): 24 | provide_data = self.data_iter1.provide_data[0] 25 | shape = list(provide_data[1]) 26 | shape[0] *= 2 27 | 28 | return [(provide_data[0], tuple(shape))] 29 | 30 | @property 31 | def provide_label(self): 32 | # provide_label = self.data_iter1.provide_label[0][1] 33 | # Different labels should be used here for actual application 34 | labels = [('softmax_label', (self.batch_size,))] 35 | if self.use_lsoftmax: 36 | labels.append(('lsoftmax_label', (self.batch_size,))) 37 | if self.use_verifi: 38 | labels.append(('verifi_label', (self.batch_size,))) 39 | if self.use_center: 40 | labels.append(('center_label',(self.batch_size,))) 41 | return labels 42 | 43 | def hard_reset(self): 44 | self.data_iter1.hard_reset() 45 | self.data_iter2.hard_reset() 46 | 47 | def reset(self): 48 | self.data_iter1.reset() 49 | self.data_iter2.reset() 50 | 51 | def next(self): 52 | batch1 = self.data_iter1.next() 53 | batch2 = self.data_iter2.next() 54 | 55 | def concat_array(data1, data2, gpus, ndarray=True): 56 | n = data2.shape[0] 57 | k = n / gpus 58 | data_lst = [] 59 | for i in range(0, n, n / gpus): 60 | data_lst.append(data1[i:i + k]) 61 | data_lst.append(data2[i:i + k]) 62 | 63 | # print data_lst[0].shape, data_lst[1].shape 64 | data = mx.nd.concatenate(data_lst) if ndarray\ 65 | else np.concatenate(data_lst) 66 | 67 | return data 68 | 69 | data = concat_array(batch1.data[0], batch2.data[0], self.gpus) 70 | label = concat_array(batch1.label[0], batch2.label[0], self.gpus) 71 | index = concat_array( 72 | batch1.index, -batch2.index, self.gpus, ndarray=False) 73 | 74 | labels = [label] 75 | if self.use_verifi: 76 | labels.append(label) 77 | if self.use_lsoftmax: 78 | labels.append(label) 79 | if self.use_center: 80 | labels.append(label) 81 | # print data.shape 82 | return mx.io.DataBatch(data=[data], 83 | label=labels, 84 | pad=batch1.pad + batch2.pad, 85 | index=index) 86 | -------------------------------------------------------------------------------- /baseline/utils.py: -------------------------------------------------------------------------------- 1 | import find_mxnet 2 | import mxnet as mx 3 | from even_iterator import Even_iterator 4 | 5 | def load_checkpoint(prefix, epoch): 6 | # symbol = sym.load('%s-symbol.json' % prefix) 7 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch)) 8 | arg_params = {} 9 | aux_params = {} 10 | for k, v in save_dict.items(): 11 | tp, name = k.split(':', 1) 12 | if name in ['triplet_match', 'triplet', 'lmnn', 'lsoftmax', 'lsoftmax_weight', 'lsoftmax_label']: 13 | continue 14 | if tp == 'arg': 15 | arg_params[name] = v 16 | if tp == 'aux': 17 | aux_params[name] = v 18 | return (arg_params, aux_params) 19 | 20 | def copyto(x): 21 | return x.copyto(x.context) 22 | 23 | def get_imRecordIter(args, name, input_shape, batch_size, kv=None, shuffle=False, aug=False, even_iter=False): 24 | ''' 25 | get iterator use even_iterator or ImageRecordIter 26 | ''' 27 | if even_iter: 28 | aug_params = {} 29 | aug_params['resize'] = 128 30 | aug_params['rand_crop'] = aug 31 | aug_params['rand_mirror'] = aug 32 | aug_params['input_shape'] = input_shape 33 | aug_params['mean'] = 128.0 34 | 35 | dataiter = Even_iterator( 36 | '%s/%s.lst' % (args.data_dir, name), 37 | batch_size=batch_size, 38 | aug_params=aug_params, 39 | shuffle=shuffle, 40 | data_dir = args.data_dir) 41 | else: 42 | if aug: 43 | dataiter = mx.io.ImageRecordIter( 44 | path_imglist="%s/%s.lst" % (args.data_dir, name), 45 | path_imgrec="%s/%s.rec" % (args.data_dir, name), 46 | # mean_img="models/market_mean.bin", 47 | mean_r=128.0, 48 | mean_g=128.0, 49 | mean_b=128.0, 50 | rand_crop=True, 51 | rand_mirror=True, 52 | max_random_contrast=0.1, 53 | max_random_illumination=0.1, 54 | max_aspect_ratio=0.1, 55 | max_shear_ratio=0.2, 56 | random_h=10, 57 | random_s=10, 58 | random_l=10, 59 | #max_random_contrast=0.2, 60 | #max_random_illumination=0.2, 61 | #max_aspect_ratio=0.2, 62 | #max_shear_ratio=0.2, 63 | #random_h=30, 64 | #random_s=30, 65 | #random_l=30, 66 | prefetch_buffer=4, 67 | preprocess_threads=4, 68 | shuffle=shuffle, 69 | label_width=1, 70 | round_batch=True, 71 | data_shape=input_shape, 72 | batch_size=batch_size,) 73 | #num_parts=kv.num_workers, 74 | #part_index=kv.rank) 75 | else: 76 | dataiter = mx.io.ImageRecordIter( 77 | path_imglist="%s/%s.lst" % (args.data_dir, name), 78 | path_imgrec="%s/%s.rec" % (args.data_dir, name), 79 | # mean_img="models/market_mean.bin", 80 | mean_r=128.0, 81 | mean_g=128.0, 82 | mean_b=128.0, 83 | prefetch_buffer=4, 84 | preprocess_threads=4, 85 | shuffle=shuffle, 86 | label_width=1, 87 | round_batch=True, 88 | data_shape=input_shape, 89 | batch_size=batch_size,) 90 | #num_parts=kv.num_workers, 91 | #part_index=kv.rank) 92 | 93 | return dataiter 94 | -------------------------------------------------------------------------------- /baseline/preprocess_mars_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import csv 4 | import random 5 | import glob 6 | import numpy as np 7 | 8 | ROOT = '/data3/matt/MARS' 9 | output = '/data3/matt/MARS/recs' 10 | im2rec = '/home/tina/reid/mxnet/bin/im2rec' 11 | 12 | 13 | def load_split(): 14 | train, test = [], [] 15 | cnt = 0 16 | for i in xrange(386): 17 | cam_a = glob.glob('%s/multi_shot/cam_a/person_%04d/*.png' % (ROOT, i)) 18 | cam_b = glob.glob('%s/multi_shot/cam_b/person_%04d/*.png' % (ROOT, i)) 19 | if len(cam_a) * len(cam_b) > 0: 20 | cnt += 1 21 | if cnt > 100: 22 | test.append(i) 23 | else: 24 | train.append(i) 25 | if cnt >= 200: 26 | break 27 | return train, test 28 | 29 | def rnd_pos(N, i): 30 | x = random.randint(0, N - 2) 31 | return x + 1 if x == i else x 32 | 33 | def save_rec(lst, path, name): 34 | lst_file = '%s/%s.lst' % (path, name) 35 | rec_file = '%s/%s.rec' % (path, name) 36 | #print lst_file, rec_file, '%s %s %s %s resize=128 quality=90' % (im2rec, lst_file, ROOT, rec_file) 37 | fo = csv.writer(open(lst_file, "w"), delimiter='\t', lineterminator='\n') 38 | for item in lst: 39 | fo.writerow(item) 40 | print 'echo 123456 | sudo -S %s %s %s %s resize=128 quality=90 &' % (im2rec, lst_file, ROOT, rec_file) 41 | #subprocess.call('%s %s %s %s resize=128 quality=90' % (im2rec, lst_file, ROOT, rec_file)) 42 | 43 | def save_train(f, is_valid=False): 44 | plst, nlst, cnt, N, pool = [], [], 0, len(f), [_ for _ in xrange(len(f))] 45 | for _ in xrange(100000 if not is_valid else 2000): 46 | ts = random.sample(pool, 96) 47 | ns, ps = ts[:64], ts[64:] 48 | for r in xrange(32): 49 | i, x, y = ps[r], ns[r + r], ns[r + r + 1] 50 | p1c = random.randint(0, len(f[i]) - 1) 51 | p2c = rnd_pos(len(f[i]), p1c) 52 | p1 = (cnt, i, f[i][p1c][random.randint(0, len(f[i][p1c]) - 1)]) 53 | p2 = (cnt + 1, i, f[i][p2c][random.randint(0, len(f[i][p2c]) - 1)]) 54 | n1c = random.randint(0, len(f[x]) - 1) 55 | n2c = random.randint(0, len(f[y]) - 1) 56 | n1 = (cnt, x, f[x][n1c][random.randint(0, len(f[x][n1c]) - 1)]) 57 | n2 = (cnt + 1, y, f[y][n2c][random.randint(0, len(f[y][n2c]) - 1)]) 58 | cnt += 2 59 | plst.append(p1) 60 | plst.append(p2) 61 | nlst.append(n1) 62 | nlst.append(n2) 63 | save_rec(plst, output, 'image_' + ('valid' if is_valid else 'train') + '_even') 64 | save_rec(nlst, output, 'image_' + ('valid' if is_valid else 'train') + '_rand') 65 | 66 | def gen_train(): 67 | pool = [] 68 | for i in xrange(1500): 69 | images = glob.glob('%s/bbox_train/%04d/*.jpg' % (ROOT, i)) 70 | f = dict() 71 | for k in images: 72 | name = k.split('/')[-1] 73 | ct = name[4:6] 74 | if not ct in f: 75 | f[ct] = [] 76 | f[ct].append(k[len(ROOT):]) 77 | g = [] 78 | for x in f: 79 | if len(f[x]) > 1: 80 | g.append(f[x]) 81 | if len(g) <= 1: 82 | continue 83 | pool.append(g) 84 | 85 | save_train(pool) 86 | save_train(pool, is_valid=True) 87 | 88 | def naive_lst(dataset): 89 | lst_file = open('%s/MARS-evaluation/info/%s_name.txt' % (ROOT, dataset)) 90 | lst, cnt = [], 0 91 | for line in lst_file: 92 | s = line.strip() 93 | lst.append((cnt, 0, '/bbox_%s/%s/%s' % (dataset, s[:4], s))) 94 | cnt += 1 95 | save_rec(lst, output, 'eval_' + dataset) 96 | 97 | if __name__ == '__main__': 98 | #naive_lst('train') 99 | #naive_lst('test') 100 | gen_train() 101 | -------------------------------------------------------------------------------- /RL/batch_provider.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | 4 | class BatchProvider: 5 | def __init__(self, F, lst, is_train, size, sample_ratio=0.5, need_feat=False, start=None, end=None, is_valid=False, agent=None): 6 | self.F = F 7 | self.lst = lst 8 | self.is_train = is_train 9 | self.size = size 10 | self.N = lst.shape[0] / 2 11 | self.sample_ratio = sample_ratio 12 | self.need_feat = need_feat 13 | self.valid = is_valid 14 | self.cnt = 0 15 | self.agent = agent 16 | if start is not None: 17 | self.start, self.end = start, end 18 | else: 19 | self.start, self.end = 0, self.N 20 | self.vid = [] 21 | for i in xrange(self.N + self.N): 22 | for j in xrange(lst[i + 1] - lst[i]): 23 | self.vid.append(i) 24 | self.epoch_rounds = lst[-1] * 2 25 | self.first_imgs = [(i, j) for i in xrange(lst[-1]) for j in xrange(2)] 26 | random.shuffle(self.first_imgs) 27 | self.vid = [] 28 | for i in xrange(self.N + self.N): 29 | for j in xrange(lst[i + 1] - lst[i]): 30 | self.vid.append(i) 31 | self.reset() 32 | self.hit_cnt = np.zeros(self.epoch_rounds / 2) 33 | self.img_rank = [] 34 | for i in xrange(self.N + self.N): 35 | g = [] 36 | for j in xrange(lst[i], lst[i + 1]): 37 | g.append(j) 38 | random.shuffle(g) 39 | self.img_rank.append(g) 40 | 41 | 42 | def reset(self): 43 | if self.valid: 44 | self.cnt = 0 45 | self.terminal = [True for _ in xrange(self.size)] 46 | self.A = -1 47 | self.B = -1 48 | self.curA = np.zeros((self.size, 3, 224, 112)) 49 | self.curB = np.zeros((self.size, 3, 224, 112)) 50 | 51 | 52 | def get_img(self, i, aug=False): 53 | idx = random.randrange(self.lst[i], self.lst[i + 1]) 54 | self.hit_cnt[idx] += 1 55 | return self.F.get_single(idx, aug), idx 56 | 57 | 58 | def provide(self, preload=None): 59 | if preload is None: 60 | if self.valid: 61 | next = self.cnt 62 | self.cnt += 1 63 | self.A = next / self.N 64 | self.B = next % self.N 65 | self.B += self.N 66 | for i in xrange(self.size): 67 | self.curA[i] = self.F.get_single(self.img_rank[self.A][i % len(self.img_rank[self.A])]) 68 | self.curB[i] = self.F.get_single(self.img_rank[self.B][i % len(self.img_rank[self.B])]) 69 | else: 70 | first_img = self.first_imgs[self.cnt % self.epoch_rounds] 71 | self.cnt += 1 72 | a = self.vid[first_img[0]] 73 | if a < self.N: 74 | if first_img[1] == 1: 75 | b = a + self.N 76 | else: 77 | b = self.vid[random.randrange(self.lst[self.N], self.lst[self.N+self.N])] 78 | while b == a + self.N: 79 | b = self.vid[random.randrange(self.lst[self.N], self.lst[self.N+self.N])] 80 | else: 81 | if first_img[1] == 1: 82 | b = a - self.N 83 | else: 84 | b = self.vid[random.randrange(self.lst[0], self.lst[self.N])] 85 | while b == a - self.N: 86 | b = self.vid[random.randrange(self.lst[0], self.lst[self.N])] 87 | self.A, self.B = a, b 88 | idx = [] 89 | for i in xrange(self.size): 90 | self.curA[i], ida = self.get_img(self.A, True)#self.A%self.N==self.B%self.N) 91 | self.curB[i], idb = self.get_img(self.B, True)#self.A%self.N==self.B%self.N) 92 | idx.append((ida, idb)) 93 | else: 94 | for i in xrange(self.size): 95 | self.curA[i], self.curB[i] = self.F.get_single(preload[i][0]), self.F.get_single(preload[i][1]) 96 | 97 | cur = [np.array(self.curA), np.array(self.curB)] 98 | if not self.valid: 99 | if preload is None: 100 | return cur, self.A, self.B, idx 101 | else: 102 | return cur 103 | return cur, self.A, self.B -------------------------------------------------------------------------------- /RL/utils.py: -------------------------------------------------------------------------------- 1 | import find_mxnet 2 | import mxnet as mx 3 | from img_lib import ImgLibrary 4 | 5 | 6 | def dist(a, b): 7 | diff = mx.nd.L2Normalization(mx.nd.expand_dims(a, axis=0)) - mx.nd.L2Normalization(mx.nd.expand_dims(b, axis=0)) 8 | return mx.nd.sum(diff * diff).asnumpy()[0] 9 | 10 | 11 | class TimeInvScheduler(mx.lr_scheduler.LRScheduler): 12 | def __init__(self, step, stop_factor_lr=1e-8): 13 | super(TimeInvScheduler, self).__init__() 14 | if step < 1: 15 | raise ValueError("Schedule step must be greater or equal than 1 round") 16 | self.step = step 17 | self.stop_factor_lr = stop_factor_lr 18 | 19 | def __call__(self, num_update): 20 | t = num_update / self.step 21 | lr = self.base_lr * 1.0 / (1.0 + t) 22 | if lr < self.stop_factor_lr: 23 | lr = self.stop_factor_lr 24 | return lr 25 | 26 | 27 | def load_checkpoint(prefix, epoch): 28 | # symbol = sym.load('%s-symbol.json' % prefix) 29 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch)) 30 | arg_params = {} 31 | aux_params = {} 32 | for k, v in save_dict.items(): 33 | tp, name = k.split(':', 1) 34 | #if name in ['triplet_match', 'triplet', 'lmnn', 'lsoftmax', 'lsoftmax_weight', 'lsoftmax_label']: 35 | # continue 36 | if tp == 'arg': 37 | arg_params[name] = v 38 | if tp == 'aux': 39 | aux_params[name] = v 40 | return (arg_params, aux_params) 41 | 42 | 43 | def copyto(x): 44 | return x.copyto(x.context) 45 | 46 | 47 | def get_imRecordIter(args, name, input_shape, batch_size, kv=None, shuffle=False, aug=False, even_iter=False): 48 | ''' 49 | get iterator use ImgLibrary or ImageRecordIter 50 | ''' 51 | if even_iter: 52 | aug_params = {} 53 | aug_params['resize'] = 128 54 | aug_params['rand_crop'] = aug 55 | aug_params['rand_mirror'] = aug 56 | aug_params['input_shape'] = input_shape 57 | aug_params['mean'] = 128.0 58 | 59 | dataiter = ImgLibrary( 60 | '%s/%s.lst' % (args.data_dir, name), 61 | batch_size=batch_size, 62 | aug_params=aug_params, 63 | shuffle=shuffle, 64 | data_dir = args.data_dir) 65 | else: 66 | if aug: 67 | dataiter = mx.io.ImageRecordIter( 68 | path_imglist="%s/%s.lst" % (args.data_dir, name), 69 | path_imgrec="%s/%s.rec" % (args.data_dir, name), 70 | # mean_img="models/market_mean.bin", 71 | mean_r=128.0, 72 | mean_g=128.0, 73 | mean_b=128.0, 74 | rand_crop=True, 75 | rand_mirror=True, 76 | #max_random_contrast=0.1, 77 | #max_random_illumination=0.1, 78 | #max_aspect_ratio=0.1, 79 | #max_shear_ratio=0.2, 80 | #random_h=10, 81 | #random_s=10, 82 | #random_l=10, 83 | #max_random_contrast=0.2, 84 | #max_random_illumination=0.2, 85 | #max_aspect_ratio=0.2, 86 | #max_shear_ratio=0.2, 87 | #random_h=30, 88 | #random_s=30, 89 | #random_l=30, 90 | prefetch_buffer=4, 91 | preprocess_threads=4, 92 | shuffle=shuffle, 93 | label_width=1, 94 | round_batch=True, 95 | data_shape=input_shape, 96 | batch_size=batch_size,) 97 | #num_parts=kv.num_workers, 98 | #part_index=kv.rank) 99 | else: 100 | dataiter = mx.io.ImageRecordIter( 101 | path_imglist="%s/%s.lst" % (args.data_dir, name), 102 | path_imgrec="%s/%s.rec" % (args.data_dir, name), 103 | # mean_img="models/market_mean.bin", 104 | mean_r=128.0, 105 | mean_g=128.0, 106 | mean_b=128.0, 107 | prefetch_buffer=4, 108 | preprocess_threads=4, 109 | shuffle=shuffle, 110 | label_width=1, 111 | round_batch=True, 112 | data_shape=input_shape, 113 | batch_size=batch_size,) 114 | #num_parts=kv.num_workers, 115 | #part_index=kv.rank) 116 | 117 | return dataiter 118 | -------------------------------------------------------------------------------- /RL/tb_system.py: -------------------------------------------------------------------------------- 1 | class TensorBoardSystem: 2 | def __init__(self, pre, writer): 3 | self.tb_pool = {} 4 | self.pre = pre 5 | self.init_board() 6 | self.heartbeat = 0 7 | self.writer = writer 8 | self.tp = 0 9 | self.fp = 0 10 | self.tn = 0 11 | self.fn = 0 12 | 13 | def init_board(self): 14 | #pool_names = ['softmax_acc', 'triplet_loss', 'triplet_neg', 'triplet_pos', 'triplet_diff', 'triplet_ratio'] 15 | Q_pool_names = ['neg_neg', 'pos_round', 'neg_ratio', 'neg_round', 'pos_pos', 'pos_ratio', 'pos_acc', 'neg_acc', 'Qvalue_0', 'Qvalue_1', 'Qvalue_2', 'Qvalue_3', 'Qgt_0', 'Qgt_1', 'Qdiff_2', 'Qdiff_3']#, 'epsilon'] 16 | #for i in pool_names: 17 | # add_board(tb_pool, i) 18 | '''for i in xrange(seq_len): 19 | pre = 'Q' + str(i) + '_' 20 | for j in Q_pool_names: 21 | add_board(tb_pool, pre + j)''' 22 | for j in Q_pool_names: 23 | self.add_board(self.pre + '_' + j) 24 | 25 | def add_board(self, name): 26 | self.tb_pool[name] = [0, 0] 27 | 28 | def update_board(self, name, v): 29 | self.tb_pool[name][0] += v 30 | self.tb_pool[name][1] += 1.0 31 | 32 | def get_board(self, name): 33 | if self.tb_pool[name][1] > 0.5: 34 | return (self.tb_pool[name][0] / self.tb_pool[name][1], self.heartbeat) 35 | else: 36 | return (0, 0) 37 | 38 | def put_board(self, Qvalue, action, t, delta, epsilon, rounds, dummy=False): 39 | '''for i in xrange(len(label)): 40 | update_board(tb_pool, pool_names[0], softmax_output[i] == label[i]) 41 | for i in xrange(len(triplet_output)): 42 | t = triplet_output[i].asnumpy() 43 | for j in xrange(args.batch_size): 44 | update_board(tb_pool, pool_names[1 + i], t[j])''' 45 | act = action 46 | pre = self.pre 47 | self.update_board(('%s_Qvalue_%d' % (pre, act)), Qvalue[act]) 48 | self.update_board(('%s_Qgt_%d' % (pre, t)), Qvalue[1 if t else 0]) 49 | if act == 2: 50 | if t == 1: 51 | self.update_board('%s_neg_neg' % (pre), 0.0) 52 | else: 53 | self.update_board('%s_neg_neg' % (pre), 1.0) 54 | self.update_board('%s_neg_ratio' % (pre), 1.0) 55 | elif act == 3: 56 | if t == 1: 57 | self.update_board('%s_pos_pos' % (pre), 1.0) 58 | else: 59 | self.update_board('%s_pos_pos' % (pre), 0.0) 60 | self.update_board('%s_pos_ratio' % (pre), 1.0) 61 | else: 62 | if act == 1: 63 | if t == 1: 64 | self.tp += 1.0 65 | else: 66 | self.fp += 1.0 67 | else: 68 | if t == 1: 69 | self.fn += 1.0 70 | else: 71 | self.tn += 1.0 72 | self.update_board('%s_pos_ratio' % (pre), 0.0) 73 | self.update_board('%s_neg_ratio' % (pre), 0.0) 74 | self.update_board('%s_pos_pos' % (pre), t == act) 75 | if t == 1: 76 | self.update_board('%s_pos_acc' % (pre), act == 1) 77 | if not dummy: 78 | self.update_board('%s_pos_round' % (pre), rounds) 79 | else: 80 | self.update_board('%s_neg_acc' % (pre), act == 0) 81 | if not dummy: 82 | self.update_board('%s_neg_round' % (pre), rounds) 83 | #self.update_board('Q_epsilon', epsilon) 84 | 85 | def print_board(self): 86 | for i in self.tb_pool: 87 | v = self.get_board(i) 88 | if v[1] > 0: 89 | self.writer.add_scalar(i, v[0], v[1]) 90 | if (self.tp + self.fp > 0) and (self.tp + self.fn > 0): 91 | precision = 1.0 * self.tp / (self.tp + self.fp) 92 | recall = 1.0 * self.tp / (self.tp + self.fn) 93 | gm = (precision * recall) ** 0.5 94 | acc = 1.0 * (self.tp + self.tn) / (self.tp + self.tn + self.fp + self.fn) 95 | self.writer.add_scalar(self.pre + '_' + 'precision', precision, self.heartbeat) 96 | self.writer.add_scalar(self.pre + '_' + 'recall', recall, self.heartbeat) 97 | self.writer.add_scalar(self.pre + '_' + 'gm', gm, self.heartbeat) 98 | self.writer.add_scalar(self.pre + '_' + 'acc', acc, self.heartbeat) 99 | if precision + recall > 0: 100 | f1 = 2.0 * (precision * recall) / (precision + recall) 101 | self.writer.add_scalar(self.pre + '_' + 'f1', f1, self.heartbeat) 102 | self.heartbeat += 1 103 | -------------------------------------------------------------------------------- /baseline/extract.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, "mxnet/python/") 3 | import find_mxnet 4 | import mxnet as mx 5 | import logging 6 | import numpy as np 7 | import argparse 8 | import time 9 | import random 10 | import loss_layers 11 | import lsoftmax 12 | import loss_drop_layers 13 | import pairwiseDropout 14 | import scipy.io as sio 15 | import h5py 16 | 17 | # extract features for testing set 18 | 19 | def get_imRecordIter(name, input_shape, batch_size, kv, shuffle=False, aug=False): 20 | dataiter = mx.io.ImageRecordIter( 21 | path_imglist="%s/%s.lst" % (args.data_dir, name), 22 | path_imgrec="%s/%s.rec" % (args.data_dir, name), 23 | #mean_img="models/market_mean.bin", 24 | mean_r=128.0, 25 | mean_g=128.0, 26 | mean_b=128.0, 27 | rand_crop=aug, 28 | rand_mirror=aug, 29 | prefetch_buffer=4, 30 | preprocess_threads=3, 31 | shuffle=shuffle, 32 | label_width=1, 33 | data_shape=input_shape, 34 | batch_size=batch_size, 35 | num_parts=kv.num_workers, 36 | part_index=kv.rank) 37 | 38 | return dataiter 39 | 40 | 41 | def extract_feature(model, iterator, sav_name, num, batch_size): 42 | feature = np.zeros((num, args.feature_size)) 43 | now = 0 44 | iterator.reset() 45 | for batch in iterator: 46 | data = batch.data[0] 47 | output = model.predict(data) 48 | real_size = batch_size - batch.pad 49 | output = output[:real_size] 50 | 51 | feature[now:now+real_size] = output 52 | now += real_size 53 | 54 | print feature.shape, now 55 | h5f = h5py.File(sav_name, 'w') 56 | h5f.create_dataset('feat', data=feature) 57 | h5f.close() 58 | #data = {'feat': feature} 59 | #sio.savemat(sav_name, data, do_compression=True) 60 | #np.savetxt(sav_name[:-4]+'.csv', feature) 61 | # with open(sav_name, "w") as f: 62 | # cPickle.dump(feature, f, protocol=cPickle.HIGHEST_PROTOCOL) 63 | 64 | 65 | def parse_args(): 66 | parser = argparse.ArgumentParser( 67 | description='single domain car recog training') 68 | parser.add_argument('--gpus', type=str, default='6', 69 | help='the gpus will be used, e.g "0,1,2,3"') 70 | parser.add_argument('--data-dir', type=str, 71 | default="/data3/matt/iLIDS-VID/recs", 72 | help='data directory') 73 | parser.add_argument('--batch-size', type=int, default=1024, 74 | help='the batch size') 75 | parser.add_argument('--feature-size', type=int, default=1024, 76 | help='the feature size') 77 | parser.add_argument('--mode', type=str, default='ilds_baseline_b4', 78 | help='model mode') 79 | parser.add_argument('--dataset', type=str, default='image_test', 80 | help='dataset (test/query)') 81 | parser.add_argument('--kv-store', type=str, 82 | default='device', help='the kvstore type') 83 | parser.add_argument('--model-load-epoch', type=int, default=1, 84 | help='load the model on an epoch using the model-load-prefix') 85 | parser.add_argument('--model-load-prefix', type=str, default="test", 86 | help='load model prefix') 87 | return parser.parse_args() 88 | 89 | 90 | def load_checkpoint(prefix, epoch): 91 | # ssymbol = sym.load('%s-symbol.json' % prefix) 92 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch)) 93 | arg_params = {} 94 | aux_params = {} 95 | for k, v in save_dict.items(): 96 | tp, name = k.split(':', 1) 97 | if tp == 'arg': 98 | arg_params[name] = v 99 | if tp == 'aux': 100 | aux_params[name] = v 101 | return (arg_params, aux_params) 102 | 103 | 104 | args = parse_args() 105 | 106 | print args 107 | batch_size = args.batch_size 108 | devices = [mx.gpu(int(i)) for i in args.gpus.split(',')] 109 | 110 | symbol, arg_params, aux_params = mx.model.load_checkpoint( 111 | 'models/%s' % args.mode, args.model_load_epoch) 112 | 113 | internals = symbol.get_internals() 114 | symbol = internals["flatten_output"] 115 | l2 = mx.symbol.L2Normalization(data=symbol, name='l2_norm') 116 | kv = mx.kvstore.create(args.kv_store) 117 | dataiter = get_imRecordIter( 118 | '%s' % args.dataset, (3, 224, 112), batch_size, 119 | kv, shuffle=False, aug=False) 120 | 121 | model = mx.model.FeedForward( 122 | symbol=l2, ctx=devices, arg_params=arg_params, 123 | aux_params=aux_params, allow_extra_params=True) 124 | 125 | num = len(file('%s/%s.lst' % (args.data_dir, args.dataset)).read().splitlines()) 126 | extract_feature(model, dataiter, 'features/%s-%s.mat' % (args.dataset, args.mode), num, batch_size) 127 | print ('done') 128 | -------------------------------------------------------------------------------- /RL/base_module.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import warnings 3 | import find_mxnet 4 | import mxnet as mx 5 | import numpy as np 6 | from mxnet.module import Module 7 | from mxnet import context as ctx 8 | from mxnet.initializer import Uniform 9 | from mxnet import ndarray as nd 10 | 11 | COUNT_MAX = 1 12 | USE_AVERAGE = False 13 | 14 | class BaseModule(Module): 15 | def __init__(self, symbol, data_names=('data',), label_names=('softmax_label',), 16 | logger=logging, context=ctx.cpu(), work_load_list=None, 17 | fixed_param_names=None, state_names=None): 18 | # count how many times gradients be added 19 | self.add_counter = 0 20 | self.count_max = COUNT_MAX 21 | super(BaseModule, self).__init__(symbol=symbol, data_names=data_names, 22 | label_names=label_names, logger=logger, context=context, 23 | fixed_param_names=fixed_param_names) 24 | 25 | def clear_gradients(self): 26 | """clear gradient 27 | """ 28 | self.add_counter = 0 29 | for grads in self._exec_group.grad_arrays: 30 | for grad in grads: 31 | grad -= grad 32 | 33 | def aver_gradients(self, n): 34 | ''' get average gradients 35 | ''' 36 | for grads in self._exec_group.grad_arrays: 37 | for grad in grads: 38 | grad /= float(n) 39 | 40 | def add_gradients_from_module(self, from_module): 41 | """add gradients 42 | """ 43 | self.add_counter += 1 44 | gradfrom = [[grad.copyto(grad.context) for grad in grads] for grads in 45 | from_module._exec_group.grad_arrays] 46 | for gradsto, gradsfrom in zip(self._exec_group.grad_arrays, 47 | gradfrom): 48 | for gradto, gradfrom in zip(gradsto, gradsfrom): 49 | gradto += gradfrom 50 | 51 | if self.add_counter == self.count_max: 52 | if USE_AVERAGE: 53 | self.aver_gradients(self.add_counter) 54 | self.update() 55 | self.clear_gradients() 56 | self.add_counter = 0 57 | 58 | def copy_from_module(self, from_module): 59 | """copy from another module 60 | """ 61 | arg_params, aux_params = from_module.get_params() 62 | self.init_params(initializer=None, arg_params=arg_params, 63 | aux_params=aux_params, force_init=True) 64 | 65 | def copy_param_from_module(self, from_module): 66 | arg_params, _ = from_module.get_params() 67 | _, aux_params = self.get_params() 68 | self.init_params(initializer=None, arg_params=arg_params, 69 | aux_params=aux_params, force_init=True) 70 | 71 | def clip_gradients(self, threshold): 72 | """clip gradients 73 | """ 74 | for grads in self._exec_group.grad_arrays: 75 | for grad in grads: 76 | grad -= grad - \ 77 | mx.nd.clip(grad, -1.0 * threshold, 1.0 * threshold).copy() 78 | 79 | 80 | def norm_clipping(self, threshold=1.0): 81 | """Clip the norm according to the threshold. 82 | All the gradients are concatenated to a single vector and the overall norm is calculated. 83 | Follows `[ICML2013] On the difficulty of training recurrent neural networks` 84 | Parameters 85 | ---------- 86 | threshold : float, optional 87 | Returns 88 | ------- 89 | norm_val : float 90 | The norm value. It could be used to measure whether the gradients are stable. 91 | """ 92 | assert self.binded and self.params_initialized 93 | norm_val = self.get_global_norm_val() 94 | if norm_val > threshold: 95 | ratio = threshold / float(norm_val) 96 | for grads in self._exec_group.grad_arrays: 97 | for grad in grads: 98 | grad[:] *= ratio 99 | return norm_val 100 | 101 | def get_global_norm_val(self): 102 | """Get the overall gradient norm ||W||_2 103 | Parameters 104 | ---------- 105 | net : mx.mod.Module 106 | Returns 107 | ------- 108 | norm_val : float 109 | """ 110 | assert self.binded and self.params_initialized 111 | #TODO The code in the following will cause the estimated norm to be different for multiple gpus 112 | norm_val = 0.0 113 | for i in range(len(self._exec_group.grad_arrays[0])): 114 | norm_val += np.sqrt( 115 | sum([nd.norm(grads[i]).asnumpy()[0] ** 2 116 | for grads in self._exec_group.grad_arrays])) 117 | norm_val /= float(len(self._exec_group.grad_arrays[0])) 118 | return norm_val 119 | -------------------------------------------------------------------------------- /baseline/preprocess_ilds_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import csv 4 | import random 5 | import glob 6 | import numpy as np 7 | 8 | ROOT = '/data3/matt/iLIDS-VID' 9 | output = '/data3/matt/iLIDS-VID/recs' 10 | im2rec = '/home/zhangjianfu/reid/mxnet/bin/im2rec' 11 | 12 | sets = 0 13 | 14 | def load_split(): 15 | train, test, pool = [], [], [] 16 | images, cnt = glob.glob('%s/i-LIDS-VID/images/cam1/person*/*.png' % (ROOT)), 0 17 | for i in images: 18 | t = int(i.split('/')[-2][-3:]) 19 | cnt += 1 20 | pool.append(t) 21 | train = random.sample(pool, 150) 22 | for i in pool: 23 | if i not in train: 24 | test.append(i) 25 | print train, test 26 | print len(train), len(test) 27 | return train, test 28 | 29 | def rnd_pos(N, i): 30 | x = random.randint(0, N - 2) 31 | return x + 1 if x == i else x 32 | 33 | def save_rec(lst, path, name): 34 | lst_file = '%s/%s.lst' % (path, name) 35 | rec_file = '%s/%s.rec' % (path, name) 36 | #print lst_file, rec_file, '%s %s %s %s resize=128 quality=90' % (im2rec, lst_file, ROOT, rec_file) 37 | f = open(lst_file, "w") 38 | fo = csv.writer(f, delimiter='\t', lineterminator='\n') 39 | for item in lst: 40 | fo.writerow(item) 41 | f.close() 42 | os.system('%s %s %s %s resize=128 quality=90' % (im2rec, lst_file, ROOT, rec_file)) 43 | 44 | def save_train(f, is_valid=False): 45 | plst, nlst, cnt, N, pool = [], [], 0, len(f[0]), [_ for _ in xrange(len(f[0]))] 46 | for _ in xrange(10000 if not is_valid else 200): 47 | ts = random.sample(pool, 96) 48 | ns, ps = ts[:64], ts[64:] 49 | for r in xrange(32): 50 | i, x, y = ps[r], ns[r + r], ns[r + r + 1] 51 | p1 = (cnt, i, f[0][i][random.randint(0, len(f[0][i]) - 1)]) 52 | p2 = (cnt + 1, i, f[1][i][random.randint(0, len(f[1][i]) - 1)]) 53 | n1 = (cnt, x, f[1][x][random.randint(0, len(f[1][x]) - 1)]) 54 | n2 = (cnt + 1, y, f[0][y][random.randint(0, len(f[0][y]) - 1)]) 55 | cnt += 2 56 | plst.append(p1) 57 | plst.append(p2) 58 | nlst.append(n1) 59 | nlst.append(n2) 60 | save_rec(plst, output, 'image_' + ('valid' if is_valid else 'train') + '_even'+ str(sets)) 61 | save_rec(nlst, output, 'image_' + ('valid' if is_valid else 'train') + '_rand'+ str(sets)) 62 | 63 | def save_test(f): 64 | lst, cnt_lst, cnt = [], [], 0 65 | '''for i in xrange(len(f[0])): 66 | lst.append((i * 2, 0, f[0][i][0])) 67 | lst.append((i * 2 + 1, 0, f[1][i][0]))''' 68 | for i in xrange(len(f[0])): 69 | cnt_lst.append(cnt) 70 | for j in f[0][i]: 71 | lst.append((cnt, 0, j)) 72 | cnt += 1 73 | for i in xrange(len(f[1])): 74 | cnt_lst.append(cnt) 75 | for j in f[1][i]: 76 | lst.append((cnt, 1, j)) 77 | cnt += 1 78 | cnt_lst.append(cnt) 79 | np.savetxt(output + '/image_test' + str(sets) + '.txt', np.array(cnt_lst), fmt='%d') 80 | save_rec(lst, output, 'image_test'+ str(sets)) 81 | 82 | def save_valid(f): 83 | lst, cnt_lst, cnt = [], [], 0 84 | '''for i in xrange(len(f[0])): 85 | lst.append((i * 2, 0, f[0][i][0])) 86 | lst.append((i * 2 + 1, 0, f[1][i][0]))''' 87 | for i in xrange(len(f[0])): 88 | cnt_lst.append(cnt) 89 | for j in f[0][i]: 90 | lst.append((cnt, 0, j)) 91 | cnt += 1 92 | for i in xrange(len(f[1])): 93 | cnt_lst.append(cnt) 94 | for j in f[1][i]: 95 | lst.append((cnt, 1, j)) 96 | cnt += 1 97 | cnt_lst.append(cnt) 98 | np.savetxt(output + '/image_valid' + str(sets) + '.txt', np.array(cnt_lst), fmt='%d') 99 | save_rec(lst, output, 'image_valid'+ str(sets)) 100 | 101 | 102 | def gen(train_lst, test_lst, ifshuffle): 103 | if ifshuffle: 104 | random.shuffle(train_lst) 105 | random.shuffle(test_lst) 106 | train, valid, test = [[], []], [[], []], [[], []] 107 | for i in xrange(3): 108 | lst = train_lst if i <= 1 else test_lst 109 | pool = train if i == 0 else (valid if i == 1 else test) 110 | for k in lst: 111 | for j in xrange(2): 112 | sets = 'images' if i == 1 else 'sequences' 113 | images = glob.glob('%s/i-LIDS-VID/%s/cam%d/person%03d/*.png' % (ROOT, sets, j + 1, k)) 114 | #print k, j, images 115 | assert len(images) >= 1 116 | g = [_[len(ROOT):] for _ in images] 117 | pool[j].append(g) # fix prefix 118 | 119 | save_train(train) 120 | save_train(valid, is_valid=True) 121 | save_test(test) 122 | save_valid(train) 123 | 124 | if __name__ == '__main__': 125 | for i in xrange(10): 126 | print 'sets', sets 127 | train, test = load_split() 128 | gen(train, test, ifshuffle=True) 129 | sets += 1 130 | 131 | -------------------------------------------------------------------------------- /baseline/preprocess_prid_image.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | import csv 4 | import random 5 | import glob 6 | import numpy as np 7 | 8 | ROOT = '/data3/matt/prid_2011' 9 | output = '/data3/matt/prid_2011/recs' 10 | im2rec = '/home/tina/reid/mxnet/bin/im2rec' 11 | sets = 0 12 | 13 | def load_split(): 14 | train, test, pool = [], [], [] 15 | cnt = 0 16 | for i in xrange(386): 17 | cam_a = glob.glob('%s/multi_shot/cam_a/person_%04d/*.png' % (ROOT, i)) 18 | cam_b = glob.glob('%s/multi_shot/cam_b/person_%04d/*.png' % (ROOT, i)) 19 | if len(cam_a) * len(cam_b) > 0: 20 | cnt += 1 21 | pool.append(i) 22 | if cnt >= 200: 23 | break 24 | train = random.sample(pool, 100) 25 | for i in pool: 26 | if i not in train: 27 | test.append(i) 28 | print train, test 29 | print len(train), len(test) 30 | return train, test 31 | 32 | def rnd_pos(N, i): 33 | x = random.randrange(0, N - 1) 34 | return x + 1 if x == i else x 35 | 36 | def save_rec(lst, path, name): 37 | lst_file = '%s/%s.lst' % (path, name) 38 | rec_file = '%s/%s.rec' % (path, name) 39 | #print lst_file, rec_file, '%s %s %s %s resize=128 quality=90' % (im2rec, lst_file, ROOT, rec_file) 40 | f = open(lst_file, "w") 41 | fo = csv.writer(f, delimiter='\t', lineterminator='\n') 42 | for item in lst: 43 | fo.writerow(item) 44 | f.close() 45 | os.system('%s %s %s %s resize=128 quality=90' % (im2rec, lst_file, ROOT, rec_file)) 46 | 47 | def save_train(f, is_valid=False): 48 | plst, nlst, cnt, N, pool = [], [], 0, len(f[0]), [_ for _ in xrange(len(f[0]))] 49 | for _ in xrange(10000 if not is_valid else 200): 50 | ts = random.sample(pool, 96) 51 | ns, ps = ts[:64], ts[64:] 52 | for r in xrange(32): 53 | i, x, y = ps[r], ns[r + r], ns[r + r + 1] 54 | p1 = (cnt, i, f[0][i][random.randrange(len(f[0][i]))]) 55 | p2 = (cnt + 1, i, f[1][i][random.randrange(len(f[1][i]))]) 56 | n1 = (cnt, x, f[1][x][random.randrange(len(f[1][x]))]) 57 | n2 = (cnt + 1, y, f[0][y][random.randrange(len(f[0][y]))]) 58 | cnt += 2 59 | plst.append(p1) 60 | plst.append(p2) 61 | nlst.append(n1) 62 | nlst.append(n2) 63 | save_rec(plst, output, 'image_' + ('valid' if is_valid else 'train') + '_even'+ str(sets)) 64 | save_rec(nlst, output, 'image_' + ('valid' if is_valid else 'train') + '_rand'+ str(sets)) 65 | 66 | def save_test(f): 67 | lst, cnt_lst, cnt = [], [], 0 68 | '''for i in xrange(len(f[0])): 69 | lst.append((i * 2, 0, f[0][i][0])) 70 | lst.append((i * 2 + 1, 0, f[1][i][0]))''' 71 | for i in xrange(len(f[0])): 72 | cnt_lst.append(cnt) 73 | for j in f[0][i]: 74 | lst.append((cnt, 0, j)) 75 | cnt += 1 76 | for i in xrange(len(f[1])): 77 | cnt_lst.append(cnt) 78 | for j in f[1][i]: 79 | lst.append((cnt, 1, j)) 80 | cnt += 1 81 | cnt_lst.append(cnt) 82 | np.savetxt(output + '/image_test' + str(sets) + '.txt', np.array(cnt_lst), fmt='%d') 83 | save_rec(lst, output, 'image_test'+ str(sets)) 84 | 85 | def save_valid(f): 86 | lst, cnt_lst, cnt = [], [], 0 87 | '''for i in xrange(len(f[0])): 88 | lst.append((i * 2, 0, f[0][i][0])) 89 | lst.append((i * 2 + 1, 0, f[1][i][0]))''' 90 | for i in xrange(len(f[0])): 91 | cnt_lst.append(cnt) 92 | for j in f[0][i]: 93 | lst.append((cnt, 0, j)) 94 | cnt += 1 95 | for i in xrange(len(f[1])): 96 | cnt_lst.append(cnt) 97 | for j in f[1][i]: 98 | lst.append((cnt, 1, j)) 99 | cnt += 1 100 | cnt_lst.append(cnt) 101 | np.savetxt(output + '/image_valid' + str(sets) + '.txt', np.array(cnt_lst), fmt='%d') 102 | save_rec(lst, output, 'image_valid'+ str(sets)) 103 | 104 | def gen(train_lst, test_lst, ifshuffle): 105 | if ifshuffle: 106 | random.shuffle(train_lst) 107 | random.shuffle(test_lst) 108 | train, valid, test = [[], []], [[], []], [[], []] 109 | for i in xrange(3): 110 | lst = train_lst if i <= 1 else test_lst 111 | pool = train if i == 0 else (valid if i == 1 else test) 112 | for k in lst: 113 | for j in xrange(2): 114 | sets = 'multi_shot' 115 | images = glob.glob('%s/%s/cam_%s/person_%04d/*.png' % (ROOT, sets, 'a' if j == 0 else 'b', k)) 116 | #print k, j, images 117 | assert len(images) >= 1 118 | g = [_[len(ROOT):] for _ in images] 119 | pool[j].append(g) # fix prefix 120 | 121 | save_train(train) 122 | save_train(valid, is_valid=True) 123 | save_test(test) 124 | save_valid(train) 125 | 126 | if __name__ == '__main__': 127 | for i in xrange(10): 128 | print 'sets', sets 129 | train, test = load_split() 130 | gen(train, test, ifshuffle=True) 131 | sets += 1 132 | -------------------------------------------------------------------------------- /RL/agent.py: -------------------------------------------------------------------------------- 1 | import sys 2 | #sys.path.insert(0, "mxnet/python/") 3 | import find_mxnet 4 | import mxnet as mx 5 | from mxnet.optimizer import SGD, Adam, RMSProp 6 | 7 | import numpy as np 8 | 9 | from symbols import sym_base_net, sym_DQN 10 | from utils import load_checkpoint, TimeInvScheduler, dist, copyto 11 | from base_module import BaseModule 12 | import os 13 | 14 | def create_moduleQ(data1, data2, ctx, seq_len, num_sim, num_hidden, num_acts, min_states, min_imgs, fusion=False, bn=False, is_train=False, nh=False, is_e2e=False): 15 | os.environ['MXNET_EXEC_INPLACE_GRAD_SUM_CAP'] = str(100) 16 | net = sym_DQN(data1, data2, num_sim, num_hidden, is_train=is_train, num_acts=num_acts, min_states=min_states, min_imgs=min_imgs, fusion=fusion, bn=bn, global_stats=False, no_his=False) 17 | mod = BaseModule(symbol=net, data_names=('data1', 'data2'), label_names=None, 18 | fixed_param_names=[] if is_e2e else ['data1', 'data2'], context=ctx) 19 | mod.bind(data_shapes=[('data1', (seq_len, 3, 224, 112)), 20 | ('data2', (seq_len, 3, 224, 112))], 21 | for_training=is_train, inputs_need_grad=False) 22 | return mod 23 | 24 | 25 | def get_optimizer(args): 26 | assert args.optimizer in ['sgd', 'adam', 'rms'] 27 | 28 | if args.optimizer == 'sgd': 29 | stepPerEpoch = args.num_examples 30 | lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(step=[stepPerEpoch * int(x) for x in args.lr_step.split(',')], factor=0.1) 31 | #lr_scheduler = TimeInvScheduler(step=args.tisr) # Time inverse scheduler 32 | return SGD(learning_rate=args.lr, momentum=0.9, 33 | wd=0.0001, clip_gradient=10, lr_scheduler=lr_scheduler, 34 | rescale_grad=1.0) 35 | elif args.optimizer == 'rms': 36 | return RMSProp(learning_rate=args.lr, wd=0.0001) 37 | else: 38 | return Adam(learning_rate=args.lr, wd=0.0001, clip_gradient=10) 39 | 40 | 41 | def get_Qvalue(Q, data, is_train=False): 42 | data_batch = mx.io.DataBatch([data[0], data[1]], []) 43 | Q.forward(data_batch, is_train=is_train) 44 | return Q.get_outputs()[0].asnumpy() 45 | 46 | 47 | def wash(data, ctx): 48 | ret = [] 49 | if isinstance(data[0], list): 50 | for i in xrange(len(data[0])): 51 | t = [] 52 | for j in xrange(len(data)): 53 | t.append(np.expand_dims(data[j][i], axis=0) if data[j][i].shape[0] > 1 or len(data[j][i].shape) == 1 else data[j][i]) 54 | t = np.concatenate(t) 55 | ret.append(mx.nd.array(t, ctx=ctx)) 56 | else: 57 | for i in xrange(len(data)): 58 | ret.append(mx.nd.array(data[i], ctx=ctx)) 59 | return ret 60 | 61 | 62 | class Agent: 63 | def __init__(self, args, devices): 64 | self.e2e = args.e2e 65 | self.his = args.history 66 | arg_params, aux_params = load_checkpoint('../baseline/models/%s' % args.model_load_prefix, args.model_load_epoch) 67 | data1, data2 = sym_base_net(args.network, is_train=args.e2e, global_stats=True) 68 | init = mx.initializer.Xavier(rnd_type='gaussian', factor_type='in', magnitude=2) 69 | opt = get_optimizer(args) 70 | self.Q = create_moduleQ(data1, data2, devices, args.sample_size, args.num_sim, args.num_hidden, args.num_acts, args.min_states, args.min_imgs, fusion=args.fusion, is_train=True, nh=not args.history, is_e2e=args.e2e, bn=args.q_bn) 71 | self.Q.init_params(initializer=init, 72 | arg_params=arg_params, 73 | aux_params=aux_params, 74 | allow_missing=True, 75 | force_init=True) 76 | self.Q.init_optimizer(optimizer=opt) 77 | self.target_cnt = 1 78 | self.devices = devices 79 | self.prefix = 'models/%s' % args.mode 80 | self.batch_size = args.batch_size 81 | self.update_cnt = 0 82 | self.Q.clear_gradients() 83 | self.gradQ = [[grad.copyto(grad.context) for grad in grads] for grads in self.Q._exec_group.grad_arrays] 84 | 85 | def wash_data(self, data): 86 | return wash(data, self.devices) 87 | 88 | def get_Qvalue(self, data, is_train=False): 89 | return get_Qvalue(self.Q, data, is_train=is_train) 90 | 91 | def update(self, grad): 92 | self.Q.backward(grad) 93 | for gradsr, gradsf in zip(self.Q._exec_group.grad_arrays, self.gradQ): 94 | for gradr, gradf in zip(gradsr, gradsf): 95 | gradf += gradr 96 | self.Q.clear_gradients() 97 | self.update_cnt += 1 98 | if self.update_cnt % self.batch_size == 0: 99 | print 'update', self.update_cnt 100 | for gradsr, gradsf in zip(self.Q._exec_group.grad_arrays, self.gradQ): 101 | for gradr, gradf in zip(gradsr, gradsf): 102 | gradr[:] = gradf[:] / self.batch_size 103 | self.Q.update() 104 | for grads in self.gradQ: 105 | for grad in grads: 106 | grad[:] = 0 107 | 108 | def save(self, e): 109 | self.Q.save_params('%s-%04d.params'%(self.prefix, e)) 110 | -------------------------------------------------------------------------------- /baseline/symbol_inception-bn.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Inception + BN, suitable for images with around 224 x 224 4 | 5 | Reference: 6 | 7 | Sergey Ioffe and Christian Szegedy. Batch normalization: Accelerating deep 8 | network training by reducing internal covariate shift. arXiv preprint 9 | arXiv:1502.03167, 2015. 10 | 11 | """ 12 | 13 | import find_mxnet 14 | import mxnet as mx 15 | 16 | eps = 1e-10 + 1e-5 17 | bn_mom = 0.9 18 | fix_gamma = False 19 | 20 | 21 | def ConvFactory(data, num_filter, kernel, stride=(1,1), pad=(0, 0), name=None, suffix=''): 22 | conv = mx.symbol.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, name='conv_%s%s' %(name, suffix)) 23 | bn = mx.symbol.BatchNorm(data=conv, fix_gamma=fix_gamma, eps=eps, momentum=bn_mom, name='bn_%s%s' %(name, suffix)) 24 | act = mx.symbol.Activation(data=bn, act_type='relu', name='relu_%s%s' %(name, suffix)) 25 | return act 26 | 27 | def InceptionFactoryA(data, num_1x1, num_3x3red, num_3x3, num_d3x3red, num_d3x3, pool, proj, name): 28 | # 1x1 29 | c1x1 = ConvFactory(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_1x1' % name)) 30 | # 3x3 reduce + 3x3 31 | c3x3r = ConvFactory(data=data, num_filter=num_3x3red, kernel=(1, 1), name=('%s_3x3' % name), suffix='_reduce') 32 | c3x3 = ConvFactory(data=c3x3r, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_3x3' % name)) 33 | # double 3x3 reduce + double 3x3 34 | cd3x3r = ConvFactory(data=data, num_filter=num_d3x3red, kernel=(1, 1), name=('%s_double_3x3' % name), suffix='_reduce') 35 | cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), name=('%s_double_3x3_0' % name)) 36 | cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), name=('%s_double_3x3_1' % name)) 37 | # pool + proj 38 | pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) 39 | cproj = ConvFactory(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_proj' % name)) 40 | # concat 41 | concat = mx.symbol.Concat(*[c1x1, c3x3, cd3x3, cproj], name='ch_concat_%s_chconcat' % name) 42 | return concat 43 | 44 | def InceptionFactoryB(data, num_3x3red, num_3x3, num_d3x3red, num_d3x3, name): 45 | # 3x3 reduce + 3x3 46 | c3x3r = ConvFactory(data=data, num_filter=num_3x3red, kernel=(1, 1), name=('%s_3x3' % name), suffix='_reduce') 47 | c3x3 = ConvFactory(data=c3x3r, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_3x3' % name)) 48 | # double 3x3 reduce + double 3x3 49 | cd3x3r = ConvFactory(data=data, num_filter=num_d3x3red, kernel=(1, 1), name=('%s_double_3x3' % name), suffix='_reduce') 50 | cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_double_3x3_0' % name)) 51 | cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_double_3x3_1' % name)) 52 | # pool + proj 53 | pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type="max", name=('max_pool_%s_pool' % name)) 54 | # concat 55 | concat = mx.symbol.Concat(*[c3x3, cd3x3, pooling], name='ch_concat_%s_chconcat' % name) 56 | return concat 57 | 58 | def get_symbol(num_classes=1000): 59 | # data 60 | data = mx.symbol.Variable(name="data") 61 | # stage 1 62 | conv1 = ConvFactory(data=data, num_filter=64, kernel=(7, 7), stride=(2, 2), pad=(3, 3), name='1') 63 | pool1 = mx.symbol.Pooling(data=conv1, kernel=(3, 3), stride=(2, 2), name='pool_1', pool_type='max') 64 | # stage 2 65 | conv2red = ConvFactory(data=pool1, num_filter=64, kernel=(1, 1), stride=(1, 1), name='2_red') 66 | conv2 = ConvFactory(data=conv2red, num_filter=192, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='2') 67 | pool2 = mx.symbol.Pooling(data=conv2, kernel=(3, 3), stride=(2, 2), name='pool_2', pool_type='max') 68 | # stage 2 69 | in3a = InceptionFactoryA(pool2, 64, 64, 64, 64, 96, "avg", 32, '3a') 70 | in3b = InceptionFactoryA(in3a, 64, 64, 96, 64, 96, "avg", 64, '3b') 71 | in3c = InceptionFactoryB(in3b, 128, 160, 64, 96, '3c') 72 | # stage 3 73 | in4a = InceptionFactoryA(in3c, 224, 64, 96, 96, 128, "avg", 128, '4a') 74 | in4b = InceptionFactoryA(in4a, 192, 96, 128, 96, 128, "avg", 128, '4b') 75 | in4c = InceptionFactoryA(in4b, 160, 128, 160, 128, 160, "avg", 128, '4c') 76 | in4d = InceptionFactoryA(in4c, 96, 128, 192, 160, 192, "avg", 128, '4d') 77 | in4e = InceptionFactoryB(in4d, 128, 192, 192, 256, '4e') 78 | # stage 4 79 | in5a = InceptionFactoryA(in4e, 352, 192, 320, 160, 224, "avg", 128, '5a') 80 | in5b = InceptionFactoryA(in5a, 352, 192, 320, 192, 224, "max", 128, '5b') 81 | # global avg pooling 82 | #avg = mx.symbol.Pooling(data=in5b, kernel=(7, 7), stride=(1, 1), name="global_pool", pool_type='avg') 83 | # linear classifier 84 | #flatten = mx.symbol.Flatten(data=avg, name='flatten') 85 | #fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=num_classes, name='fc1') 86 | #softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax') 87 | return in5b 88 | 89 | 90 | if __name__ == '__main__': 91 | sym = get_symbol() 92 | mx.viz.print_summary(sym, {'data': (1, 3, 128, 64)}) 93 | -------------------------------------------------------------------------------- /RL/segment_tree.py: -------------------------------------------------------------------------------- 1 | import operator 2 | 3 | 4 | class SegmentTree(object): 5 | def __init__(self, capacity, operation, neutral_element): 6 | """Build a Segment Tree data structure. 7 | https://en.wikipedia.org/wiki/Segment_tree 8 | Can be used as regular array, but with two 9 | important differences: 10 | a) setting item's value is slightly slower. 11 | It is O(lg capacity) instead of O(1). 12 | b) user has access to an efficient `reduce` 13 | operation which reduces `operation` over 14 | a contiguous subsequence of items in the 15 | array. 16 | Paramters 17 | --------- 18 | capacity: int 19 | Total size of the array - must be a power of two. 20 | operation: lambda obj, obj -> obj 21 | and operation for combining elements (eg. sum, max) 22 | must for a mathematical group together with the set of 23 | possible values for array elements. 24 | neutral_element: obj 25 | neutral element for the operation above. eg. float('-inf') 26 | for max and 0 for sum. 27 | """ 28 | assert capacity > 0 and capacity & (capacity - 1) == 0, "capacity must be positive and a power of 2." 29 | self._capacity = capacity 30 | self._value = [neutral_element for _ in range(2 * capacity)] 31 | self._operation = operation 32 | 33 | def _reduce_helper(self, start, end, node, node_start, node_end): 34 | if start == node_start and end == node_end: 35 | return self._value[node] 36 | mid = (node_start + node_end) // 2 37 | if end <= mid: 38 | return self._reduce_helper(start, end, 2 * node, node_start, mid) 39 | else: 40 | if mid + 1 <= start: 41 | return self._reduce_helper(start, end, 2 * node + 1, mid + 1, node_end) 42 | else: 43 | return self._operation( 44 | self._reduce_helper(start, mid, 2 * node, node_start, mid), 45 | self._reduce_helper(mid + 1, end, 2 * node + 1, mid + 1, node_end) 46 | ) 47 | 48 | def reduce(self, start=0, end=None): 49 | """Returns result of applying `self.operation` 50 | to a contiguous subsequence of the array. 51 | self.operation(arr[start], operation(arr[start+1], operation(... arr[end]))) 52 | Parameters 53 | ---------- 54 | start: int 55 | beginning of the subsequence 56 | end: int 57 | end of the subsequences 58 | Returns 59 | ------- 60 | reduced: obj 61 | result of reducing self.operation over the specified range of array elements. 62 | """ 63 | if end is None: 64 | end = self._capacity 65 | if end < 0: 66 | end += self._capacity 67 | end -= 1 68 | return self._reduce_helper(start, end, 1, 0, self._capacity - 1) 69 | 70 | def __setitem__(self, idx, val): 71 | # index of the leaf 72 | idx += self._capacity 73 | self._value[idx] = val 74 | idx //= 2 75 | while idx >= 1: 76 | self._value[idx] = self._operation( 77 | self._value[2 * idx], 78 | self._value[2 * idx + 1] 79 | ) 80 | idx //= 2 81 | 82 | def __getitem__(self, idx): 83 | assert 0 <= idx < self._capacity 84 | return self._value[self._capacity + idx] 85 | 86 | 87 | class SumSegmentTree(SegmentTree): 88 | def __init__(self, capacity): 89 | super(SumSegmentTree, self).__init__( 90 | capacity=capacity, 91 | operation=operator.add, 92 | neutral_element=0.0 93 | ) 94 | 95 | def sum(self, start=0, end=None): 96 | """Returns arr[start] + ... + arr[end]""" 97 | return super(SumSegmentTree, self).reduce(start, end) 98 | 99 | def find_prefixsum_idx(self, prefixsum): 100 | """Find the highest index `i` in the array such that 101 | sum(arr[0] + arr[1] + ... + arr[i - i]) <= prefixsum 102 | if array values are probabilities, this function 103 | allows to sample indexes according to the discrete 104 | probability efficiently. 105 | Parameters 106 | ---------- 107 | perfixsum: float 108 | upperbound on the sum of array prefix 109 | Returns 110 | ------- 111 | idx: int 112 | highest index satisfying the prefixsum constraint 113 | """ 114 | assert 0 <= prefixsum <= self.sum() + 1e-5 115 | idx = 1 116 | while idx < self._capacity: # while non-leaf 117 | if self._value[2 * idx] > prefixsum: 118 | idx = 2 * idx 119 | else: 120 | prefixsum -= self._value[2 * idx] 121 | idx = 2 * idx + 1 122 | return idx - self._capacity 123 | 124 | 125 | class MinSegmentTree(SegmentTree): 126 | def __init__(self, capacity): 127 | super(MinSegmentTree, self).__init__( 128 | capacity=capacity, 129 | operation=min, 130 | neutral_element=float('inf') 131 | ) 132 | 133 | def min(self, start=0, end=None): 134 | """Returns min(arr[start], ..., arr[end])""" 135 | 136 | return super(MinSegmentTree, self).reduce(start, end) 137 | -------------------------------------------------------------------------------- /RL/rnn_models.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import find_mxnet 3 | import mxnet as mx 4 | 5 | LSTMParam = namedtuple("LSTMParam", ["i2h_weight", "i2h_bias", 6 | "h2h_weight", "h2h_bias"]) 7 | 8 | def get_lstm_cell(i): 9 | return LSTMParam(i2h_weight = mx.sym.Variable("l%d_i2h_weight" % i), 10 | i2h_bias = mx.sym.Variable("l%d_i2h_bias" % i), 11 | h2h_weight = mx.sym.Variable("l%d_h2h_weight" % i), 12 | h2h_bias = mx.sym.Variable("l%d_h2h_bias" % i)) 13 | 14 | def lstm(num_hidden, indata, prev_h, prev_c, param, seqidx, layeridx, dropout=0.): 15 | """LSTM Cell symbol""" 16 | if dropout > 0.: 17 | indata = mx.sym.Dropout(data=indata, p=dropout) 18 | i2h = mx.sym.FullyConnected(data=indata, 19 | weight=param.i2h_weight, 20 | bias=param.i2h_bias, 21 | num_hidden=num_hidden * 4, 22 | name="t%d_l%d_i2h" % (seqidx, layeridx)) 23 | h2h = mx.sym.FullyConnected(data=prev_h, 24 | weight=param.h2h_weight, 25 | bias=param.h2h_bias, 26 | num_hidden=num_hidden * 4, 27 | name="t%d_l%d_h2h" % (seqidx, layeridx)) 28 | gates = i2h + h2h 29 | slice_gates = mx.sym.SliceChannel(gates, num_outputs=4, 30 | name="t%d_l%d_slice" % (seqidx, layeridx)) 31 | in_gate = mx.sym.Activation(slice_gates[0], act_type="sigmoid") 32 | in_transform = mx.sym.Activation(slice_gates[1], act_type="tanh") 33 | forget_gate = mx.sym.Activation(slice_gates[2], act_type="sigmoid") 34 | out_gate = mx.sym.Activation(slice_gates[3], act_type="sigmoid") 35 | next_c = (forget_gate * prev_c) + (in_gate * in_transform) 36 | next_h = out_gate * mx.sym.Activation(next_c, act_type="tanh") 37 | return next_h, next_c 38 | 39 | 40 | GRUParam = namedtuple("GRUParam", ["gates_i2h_weight", "gates_i2h_bias", 41 | "gates_h2h_weight", "gates_h2h_bias", 42 | "trans_i2h_weight", "trans_i2h_bias", 43 | "trans_h2h_weight", "trans_h2h_bias"]) 44 | 45 | def get_gru_cell(i): 46 | return GRUParam(gates_i2h_weight=mx.sym.Variable("l%d_i2h_gates_weight" % i), 47 | gates_i2h_bias=mx.sym.Variable("l%d_i2h_gates_bias" % i), 48 | gates_h2h_weight=mx.sym.Variable("l%d_h2h_gates_weight" % i), 49 | gates_h2h_bias=mx.sym.Variable("l%d_h2h_gates_bias" % i), 50 | trans_i2h_weight=mx.sym.Variable("l%d_i2h_trans_weight" % i), 51 | trans_i2h_bias=mx.sym.Variable("l%d_i2h_trans_bias" % i), 52 | trans_h2h_weight=mx.sym.Variable("l%d_h2h_trans_weight" % i), 53 | trans_h2h_bias=mx.sym.Variable("l%d_h2h_trans_bias" % i)) 54 | 55 | def gru(num_hidden, indata, prev_h, param, seqidx, layeridx, dropout=0.): 56 | """ 57 | GRU Cell symbol 58 | Reference: 59 | * Chung, Junyoung, et al. "Empirical evaluation of gated recurrent neural 60 | networks on sequence modeling." arXiv preprint arXiv:1412.3555 (2014). 61 | """ 62 | if dropout > 0.: 63 | indata = mx.sym.Dropout(data=indata, p=dropout) 64 | i2h = mx.sym.FullyConnected(data=indata, 65 | weight=param.gates_i2h_weight, 66 | bias=param.gates_i2h_bias, 67 | num_hidden=num_hidden * 2, 68 | name="t%d_l%d_gates_i2h" % (seqidx, layeridx)) 69 | h2h = mx.sym.FullyConnected(data=prev_h, 70 | weight=param.gates_h2h_weight, 71 | bias=param.gates_h2h_bias, 72 | num_hidden=num_hidden * 2, 73 | name="t%d_l%d_gates_h2h" % (seqidx, layeridx)) 74 | gates = i2h + h2h 75 | slice_gates = mx.sym.SliceChannel(gates, num_outputs=2, 76 | name="t%d_l%d_slice" % (seqidx, layeridx)) 77 | update_gate = mx.sym.Activation(slice_gates[0], act_type="sigmoid") 78 | reset_gate = mx.sym.Activation(slice_gates[1], act_type="sigmoid") 79 | # The transform part of GRU is a little magic 80 | htrans_i2h = mx.sym.FullyConnected(data=indata, 81 | weight=param.trans_i2h_weight, 82 | bias=param.trans_i2h_bias, 83 | num_hidden=num_hidden, 84 | name="t%d_l%d_trans_i2h" % (seqidx, layeridx)) 85 | h_after_reset = prev_h * reset_gate 86 | htrans_h2h = mx.sym.FullyConnected(data=h_after_reset, 87 | weight=param.trans_h2h_weight, 88 | bias=param.trans_h2h_bias, 89 | num_hidden=num_hidden, 90 | name="t%d_l%d_trans_i2h" % (seqidx, layeridx)) 91 | h_trans = htrans_i2h + htrans_h2h 92 | h_trans_active = mx.sym.Activation(h_trans, act_type="tanh") 93 | next_h = prev_h + update_gate * (h_trans_active - prev_h) 94 | return next_h 95 | -------------------------------------------------------------------------------- /RL/symbol_inception-bn.py: -------------------------------------------------------------------------------- 1 | """ 2 | 3 | Inception + BN, suitable for images with around 224 x 224 4 | 5 | Reference: 6 | 7 | Sergey Ioffe and Christian Szegedy. Batch normalization: Accelerating deep 8 | network training by reducing internal covariate shift. arXiv preprint 9 | arXiv:1502.03167, 2015. 10 | 11 | """ 12 | 13 | import find_mxnet 14 | import mxnet as mx 15 | 16 | eps = 1e-10 + 1e-5 17 | bn_mom = 0.9 18 | #fix_gamma_flag = False 19 | 20 | 21 | def ConvFactory(data, num_filter, kernel, stride=(1,1), pad=(0, 0), name=None, suffix=''): 22 | conv = mx.symbol.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, name='conv_%s%s' %(name, suffix)) 23 | bn = mx.symbol.BatchNorm(data=conv, fix_gamma=fix_gamma_flag, eps=eps, momentum=bn_mom, use_global_stats=global_stats_flag, name='bn_%s%s' %(name, suffix)) 24 | act = mx.symbol.Activation(data=bn, act_type='relu', name='relu_%s%s' %(name, suffix)) 25 | return act 26 | 27 | def InceptionFactoryA(data, num_1x1, num_3x3red, num_3x3, num_d3x3red, num_d3x3, pool, proj, name): 28 | # 1x1 29 | c1x1 = ConvFactory(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_1x1' % name)) 30 | # 3x3 reduce + 3x3 31 | c3x3r = ConvFactory(data=data, num_filter=num_3x3red, kernel=(1, 1), name=('%s_3x3' % name), suffix='_reduce') 32 | c3x3 = ConvFactory(data=c3x3r, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_3x3' % name)) 33 | # double 3x3 reduce + double 3x3 34 | cd3x3r = ConvFactory(data=data, num_filter=num_d3x3red, kernel=(1, 1), name=('%s_double_3x3' % name), suffix='_reduce') 35 | cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), name=('%s_double_3x3_0' % name)) 36 | cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), name=('%s_double_3x3_1' % name)) 37 | # pool + proj 38 | pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name))) 39 | cproj = ConvFactory(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_proj' % name)) 40 | # concat 41 | concat = mx.symbol.Concat(*[c1x1, c3x3, cd3x3, cproj], name='ch_concat_%s_chconcat' % name) 42 | return concat 43 | 44 | def InceptionFactoryB(data, num_3x3red, num_3x3, num_d3x3red, num_d3x3, name): 45 | # 3x3 reduce + 3x3 46 | c3x3r = ConvFactory(data=data, num_filter=num_3x3red, kernel=(1, 1), name=('%s_3x3' % name), suffix='_reduce') 47 | c3x3 = ConvFactory(data=c3x3r, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_3x3' % name)) 48 | # double 3x3 reduce + double 3x3 49 | cd3x3r = ConvFactory(data=data, num_filter=num_d3x3red, kernel=(1, 1), name=('%s_double_3x3' % name), suffix='_reduce') 50 | cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_double_3x3_0' % name)) 51 | cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_double_3x3_1' % name)) 52 | # pool + proj 53 | pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type="max", name=('max_pool_%s_pool' % name)) 54 | # concat 55 | concat = mx.symbol.Concat(*[c3x3, cd3x3, pooling], name='ch_concat_%s_chconcat' % name) 56 | return concat 57 | 58 | def get_symbol(data, num_classes=1000, fix_gamma=False, global_stats=False): 59 | global fix_gamma_flag, global_stats_flag 60 | fix_gamma_flag, global_stats_flag = fix_gamma, global_stats 61 | # data 62 | #data = mx.symbol.Variable(name="data") 63 | # stage 1 64 | conv1 = ConvFactory(data=data, num_filter=64, kernel=(7, 7), stride=(2, 2), pad=(3, 3), name='1') 65 | pool1 = mx.symbol.Pooling(data=conv1, kernel=(3, 3), stride=(2, 2), name='pool_1', pool_type='max') 66 | # stage 2 67 | conv2red = ConvFactory(data=pool1, num_filter=64, kernel=(1, 1), stride=(1, 1), name='2_red') 68 | conv2 = ConvFactory(data=conv2red, num_filter=192, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='2') 69 | pool2 = mx.symbol.Pooling(data=conv2, kernel=(3, 3), stride=(2, 2), name='pool_2', pool_type='max') 70 | pool2 = mx.symbol.BlockGrad(pool2, name='block_stage2') 71 | # stage 2 72 | in3a = InceptionFactoryA(pool2, 64, 64, 64, 64, 96, "avg", 32, '3a') 73 | in3b = InceptionFactoryA(in3a, 64, 64, 96, 64, 96, "avg", 64, '3b') 74 | in3c = InceptionFactoryB(in3b, 128, 160, 64, 96, '3c') 75 | # stage 3 76 | in4a = InceptionFactoryA(in3c, 224, 64, 96, 96, 128, "avg", 128, '4a') 77 | in4b = InceptionFactoryA(in4a, 192, 96, 128, 96, 128, "avg", 128, '4b') 78 | in4c = InceptionFactoryA(in4b, 160, 128, 160, 128, 160, "avg", 128, '4c') 79 | in4d = InceptionFactoryA(in4c, 96, 128, 192, 160, 192, "avg", 128, '4d') 80 | in4e = InceptionFactoryB(in4d, 128, 192, 192, 256, '4e') 81 | # stage 4 82 | in5a = InceptionFactoryA(in4e, 352, 192, 320, 160, 224, "avg", 128, '5a') 83 | in5b = InceptionFactoryA(in5a, 352, 192, 320, 192, 224, "max", 128, '5b') 84 | #in5b = mx.symbol.BlockGrad(in5b, name='block_in5b') 85 | # global avg pooling 86 | #avg = mx.symbol.Pooling(data=in5b, kernel=(7, 7), stride=(1, 1), name="global_pool", pool_type='avg') 87 | # linear classifier 88 | #flatten = mx.symbol.Flatten(data=avg, name='flatten') 89 | #fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=num_classes, name='fc1') 90 | #softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax') 91 | return in5b 92 | 93 | 94 | if __name__ == '__main__': 95 | sym = get_symbol() 96 | mx.viz.print_summary(sym, {'data': (1, 3, 128, 64)}) 97 | -------------------------------------------------------------------------------- /baseline/even_iterator.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import numpy as np 3 | import cv2 4 | import random 5 | 6 | 7 | class Even_iterator(mx.io.DataIter): 8 | ''' 9 | data iterator, shuffle data but always make pairs as neighbors 10 | for verification and triplet loss 11 | ''' 12 | def __init__(self, lst_name, batch_size, aug_params=dict(), shuffle=False): 13 | super(Even_iterator, self).__init__() 14 | self.batch_size = batch_size 15 | self.aug_params = aug_params.copy() 16 | self.shuffle = shuffle 17 | 18 | self.data, self.labels = Even_iterator.load_data(lst_name) 19 | print "load data over" 20 | self.data_num = self.labels.shape[0] 21 | self.label_num = 1 if len(self.labels.shape) == 1 else self.labels.shape[1] 22 | print self.data_num, self.label_num 23 | self.reset() 24 | 25 | @staticmethod 26 | def load_data(lst_name): 27 | img_lst = [x.strip().split('\t') 28 | for x in file(lst_name).read().splitlines()] 29 | im = cv2.imread(img_lst[0][-1]) 30 | h, w = im.shape[:2] 31 | n, m = len(img_lst), len(img_lst[0]) - 2 32 | data = np.zeros((n, h, w, 3), dtype=np.uint8) 33 | labels = np.zeros((n, m), dtype=np.int32) if m > 1 else np.zeros((n, ), dtype=np.int32) 34 | 35 | for i in range(len(img_lst)): 36 | im = cv2.imread(img_lst[i][-1]) 37 | 38 | data[i] = im 39 | labels[i] = img_lst[i][1:-1] if m > 1 else img_lst[i][1] 40 | 41 | return data, labels 42 | 43 | @staticmethod 44 | def even_shuffle(labels): 45 | ''' 46 | shuffle images lists and make pairs 47 | ''' 48 | s = [(x, int(random.random() * 1e5), i) for i, x in enumerate(labels)] 49 | s = sorted(s, key=lambda x: (x[0], x[1])) 50 | lst = [x[2] for x in s] 51 | 52 | idx = range(0, len(lst), 2) 53 | random.shuffle(idx) 54 | ret = [] 55 | for i in idx: 56 | ret.append(lst[i]) 57 | ret.append(lst[i + 1]) 58 | 59 | return ret 60 | 61 | @staticmethod 62 | def model_shuffle(labels): 63 | ''' 64 | shuffle images and images with same model are grouped together 65 | ''' 66 | models_idx = range(int(np.max(labels)) + 1) 67 | random.shuffle(models_idx) 68 | s = [(models_idx[x], int(random.random() * 1e5), i) for i, x in enumerate(labels)] 69 | s = sorted(s, key=lambda x: (x[0], x[1])) 70 | lst = [x[2] for x in s] 71 | 72 | return lst 73 | 74 | def reset(self): 75 | self.current = 0 76 | if self.shuffle: 77 | idx = Even_iterator.even_shuffle(self.labels) 78 | # idx = Even_iterator.model_shuffle(self.labels) 79 | self.data = self.data[idx] 80 | self.labels = self.labels[idx] 81 | 82 | @property 83 | def provide_data(self): 84 | shape = self.aug_params['input_shape'] 85 | 86 | return [('data', (self.batch_size, shape[0], shape[1], shape[2]))] 87 | 88 | @property 89 | def provide_label(self): 90 | return [('softmax_label', (self.batch_size, self.label_num))] 91 | 92 | @staticmethod 93 | def augment(im, aug_params): 94 | ''' 95 | augmentation (resize, crop, mirror) 96 | ''' 97 | crop_h, crop_w = aug_params['input_shape'][1:] 98 | ori_h, ori_w = im.shape[:2] 99 | resize = aug_params['resize'] 100 | if ori_h < ori_w: 101 | h, w = resize, int(float(resize) / ori_h * ori_w) 102 | else: 103 | h, w = int(float(resize) / ori_w * ori_h), resize 104 | 105 | if h != ori_h: 106 | im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) 107 | 108 | x, y = (w - crop_w) / 2, (h - crop_h) / 2 109 | if aug_params['rand_crop']: 110 | x = random.randint(0, w - crop_w) 111 | y = random.randint(0, h - crop_h) 112 | im = im[y:y + crop_h, x:x + crop_w, :] 113 | 114 | # cv2.imshow("name", im.astype(np.uint8)) 115 | # cv2.waitKey() 116 | 117 | im = np.transpose(im, (2, 0, 1)) 118 | newim = np.zeros_like(im) 119 | newim[0] = im[2] 120 | newim[1] = im[1] 121 | newim[2] = im[0] 122 | 123 | if aug_params['rand_mirror'] and random.randint(0, 1) == 1: 124 | newim = newim[:, :, ::-1] 125 | 126 | return newim 127 | 128 | def next(self): 129 | if self.current + self.batch_size > self.data_num: 130 | raise StopIteration 131 | 132 | shape = self.aug_params['input_shape'] 133 | x = np.zeros((self.batch_size, shape[0], shape[1], shape[2])) 134 | y = np.zeros((self.batch_size, self.label_num) if self.label_num > 1 135 | else (self.batch_size, )) 136 | index = [] 137 | for i in range(self.current, self.current + self.batch_size): 138 | im = self.data[i] 139 | im.astype(np.float32) 140 | im = Even_iterator.augment(im, self.aug_params) 141 | x[i - self.current] = im 142 | y[i - self.current] = self.labels[i] 143 | index.append(i) 144 | 145 | x -= self.aug_params['mean'] 146 | 147 | x = mx.nd.array(x) 148 | label = mx.nd.array(y) 149 | 150 | batch = mx.io.DataBatch(data=[x], label=[label], pad=0, index=index) 151 | self.current += self.batch_size 152 | 153 | return batch 154 | -------------------------------------------------------------------------------- /RL/mars_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | #sys.path.insert(0, "mxnet/python/") 3 | import find_mxnet 4 | import mxnet as mx 5 | import numpy as np 6 | import argparse 7 | import random 8 | 9 | 10 | from symbols import sym_base_net, sym_DQN 11 | from utils import get_imRecordIter, load_checkpoint 12 | 13 | 14 | cmcs = [1, 5, 10, 20] 15 | cn = 4 16 | 17 | 18 | def norm_cnts(cnts, cnt): 19 | return [cnts[i] / cnt[0] for i in xrange(cn)] 20 | 21 | 22 | def update_cnts(cur, cnts, cnt): 23 | for j in xrange(cn): 24 | if cur < cmcs[j]: 25 | cnts[j] += 1.0 26 | cnt[0] += 1.0 27 | 28 | 29 | def parse_args(): 30 | parser = argparse.ArgumentParser( 31 | description='single domain car recog training') 32 | parser.add_argument('--gpus', type=str, default='0', 33 | help='the gpus will be used, e.g "0,1"') 34 | parser.add_argument('--model-load-epoch', type=int, default=3, 35 | help='load the model on an epoch using the model-load-prefix') 36 | parser.add_argument('--model-load-prefix', type=str, default='mars-TEST-DQN_test-2017.11.15-10.51.56-bs4-ss8-incp_prep__nobg_noregQv_block2_f2_nofus0-2_poscontra_fne0.1-1-1_tisr1-sgd_t500-_qg0.9-up0.2-vtd4.0-_lr1e1-_32-1024-_na3-3', 37 | help='load model prefix') 38 | return parser.parse_args() 39 | 40 | 41 | def create_module(ctx, seq_len, is_train=False): 42 | net = sym_DQN(args, is_train=is_train, num_acts=args.num_acts, bn=False, global_stats=False, no_his=True) 43 | mod = mx.mod.Module(symbol=net, data_names=('data1', 'data2'), label_names=None, 44 | fixed_param_names=['data1', 'data2'], context=ctx) 45 | mod.bind(data_shapes=[('data1', (seq_len, 1024)), ('data2', (seq_len, 1024)),], 46 | for_training=is_train, inputs_need_grad=False) 47 | return mod 48 | 49 | 50 | def dist(a, b): 51 | diff = a - b 52 | return mx.nd.sum(diff*diff).asnumpy()[0] 53 | 54 | 55 | def copyto(x): 56 | return x.copyto(x.context) 57 | 58 | 59 | def get_train_args(name): 60 | fn = open('log/%s.log'%name) 61 | s = fn.readline()[10:] 62 | fn.close() 63 | s = 'ret=argparse.' + s 64 | exec(s) 65 | return ret 66 | 67 | test_args = parse_args() 68 | print 'test arg:', test_args 69 | devices = [mx.gpu(int(i)) for i in test_args.gpus.split(',')] 70 | 71 | model_path = 'models' 72 | 73 | args = get_train_args(test_args.model_load_prefix) 74 | print 'train arg:', args 75 | 76 | model_path = 'models' 77 | arg_params, aux_params = load_checkpoint( 78 | '%s/%s' % (model_path, test_args.model_load_prefix), test_args.model_load_epoch) 79 | base_mod = mx.mod.Module(symbol=sym_base_net(args.network, is_test=True), data_names=('data',), label_names=None, context=devices) 80 | base_mod.bind(data_shapes=[('data', (1024, 3, 224, 112))], for_training=False) 81 | base_mod.init_params(initializer=None, arg_params=arg_params,aux_params=aux_params,force_init=True) 82 | 83 | dataiter = get_imRecordIter( 84 | args, 'recs/eval_test', (3, 224, 112), 1024, 85 | shuffle=False, aug=False, even_iter=True) 86 | dataiter.reset() 87 | F = base_mod.predict(dataiter) 88 | del dataiter 89 | print 'l2' 90 | print F 91 | 92 | print 'base feat predicted' 93 | query = np.loadtxt('/data3/matt/MARS/MARS-evaluation/info/query.csv', delimiter=',').astype(int) 94 | gallery = np.loadtxt('/data3/matt/MARS/MARS-evaluation/info/gallery.csv', delimiter=',').astype(int) 95 | 96 | cnts, cnt = [0, 0, 0, 0], [0] 97 | max_turn, tot_ava = args.sample_size, 1 98 | 99 | query[:, 2] -= 1 100 | gallery[:, 0] -= 1 101 | 102 | 103 | 104 | def get_data(a): 105 | return F[random.randrange(gallery[a, 0], gallery[a, 1])] 106 | 107 | P = mx.nd.zeros((gallery.shape[0], args.sample_size, F.shape[1]),ctx=devices[0]) 108 | for a in xrange(gallery.shape[0]): 109 | j, camj = gallery[a, 2:] 110 | if j == -1 or gallery[a, 0] == gallery[a, 1]: 111 | continue 112 | cur = mx.nd.zeros((args.sample_size, F.shape[1]),ctx=devices[0]) 113 | for k in xrange(args.sample_size): 114 | cur[k] = get_data(a) 115 | P[a] = copyto(cur) 116 | 117 | data1 = mx.symbol.Variable(name="data1") 118 | data2 = mx.symbol.Variable(name="data2") 119 | Qsym = sym_DQN(data1, data2, args.num_sim, args.num_hidden, is_train=False, num_acts=args.num_acts, min_states=args.min_states, min_imgs=args.min_imgs, fusion=args.fusion, bn=args.q_bn, global_stats=False, no_his=False) 120 | Q = mx.mod.Module(symbol=Qsym, data_names=('data1', 'data2'), label_names=None, context=devices[0]) 121 | Q.bind(data_shapes=[('data1', (args.sample_size, F.shape[1])), 122 | ('data2', (args.sample_size, F.shape[1]))], 123 | for_training=False) 124 | Q.init_params(initializer=None, arg_params=arg_params,aux_params=aux_params,force_init=True, allow_missing=False) 125 | cnts, cnt = [0, 0, 0, 0], [0] 126 | pc, ps = 0, 0 127 | hists = [0 for _ in xrange(args.sample_size)] 128 | for q in xrange(query.shape[0]): 129 | i, cam, idx = query[q] 130 | if gallery[idx, 0] == gallery[idx, 1]: 131 | continue 132 | d = [] 133 | for a in xrange(gallery.shape[0]): 134 | j, camj = gallery[a, 2:] 135 | if j == i and camj == cam or j == -1 or gallery[a, 0] == gallery[a, 1]: 136 | continue 137 | if random.random() > 0.01: 138 | continue 139 | Q.forward(mx.io.DataBatch([P[idx], P[a]], []), is_train=False) 140 | Qvalues = Q.get_outputs()[0].asnumpy() 141 | for k in xrange(args.sample_size): 142 | if Qvalues[k, 2] < Qvalues[k, 0] or Qvalues[k, 2] < Qvalues[k, 1] or k == args.sample_size - 1: 143 | d.append((Qvalues[k, 0] - Qvalues[k, 1], j)) 144 | ps += k + 1 145 | pc += 1 146 | hists[k] += 1 147 | break 148 | 149 | d = sorted(d) 150 | cur = 0 151 | for a in xrange(len(d)): 152 | j = d[a][1] 153 | if j == i: 154 | break 155 | else: 156 | cur += 1 157 | update_cnts(cur, cnts, cnt) 158 | print q, i, cam, idx, cur, norm_cnts(cnts, cnt), ps * 1.0 / pc -------------------------------------------------------------------------------- /baseline/baseline_test.py: -------------------------------------------------------------------------------- 1 | import sys 2 | #sys.path.insert(0, "mxnet/python/") 3 | import find_mxnet 4 | import mxnet as mx 5 | import numpy as np 6 | import argparse 7 | import random 8 | import importlib 9 | 10 | from utils import get_imRecordIter, load_checkpoint 11 | from sklearn.metrics import average_precision_score 12 | 13 | 14 | cmcs = [1, 5, 10, 20] 15 | cn = 4 16 | 17 | 18 | def norm_cnts(cnts, cnt): 19 | return [cnts[i] / cnt[0] for i in xrange(cn)] 20 | 21 | 22 | def update_cnts(d, cnts, cnt, N, i): 23 | #r = np.argsort(d) 24 | dcur = d[i] 25 | cur, pre = 0, [] 26 | for j in xrange(N): 27 | if d[j] <= d[i] and not j == i: 28 | cur += 1 29 | pre.append(j) 30 | for j in xrange(cn): 31 | if cur < cmcs[j]: 32 | cnts[j] += 1.0 33 | cnt[0] += 1.0 34 | return pre 35 | 36 | 37 | def parse_args(): 38 | parser = argparse.ArgumentParser( 39 | description='single domain car recog training') 40 | parser.add_argument('--gpus', type=str, default='6', 41 | help='the gpus will be used, e.g "0,1"') 42 | parser.add_argument('--data-dir', type=str, 43 | default='/data3/matt/iLIDS-VID/recs',#"/data3/matt/prid_2011/recs",# 44 | help='data directory') 45 | parser.add_argument('--num-examples', type=int, default=10000, 46 | help='the number of training examples') 47 | parser.add_argument('--num-id', type=int, default=100, 48 | help='the number of training ids') 49 | parser.add_argument('--batch-size', type=int, default=512, 50 | help='the batch size') 51 | parser.add_argument('--base-model-load-epoch', type=int, default=1, 52 | help='load the model on an epoch using the model-load-prefix') 53 | parser.add_argument('--base-model-load-prefix', type=str, default='ilds_baseline', 54 | help='load model prefix') 55 | parser.add_argument('--dataset', type=str, default='image_test', 56 | help='dataset (test/query)') 57 | parser.add_argument('--network', type=str, 58 | #default='alexnet', help='network name') 59 | default='inception-bn', help='network name') 60 | return parser.parse_args() 61 | 62 | 63 | def build_base_net(args, is_train=False, global_stats=False): 64 | ''' 65 | network structure 66 | ''' 67 | symbol = importlib.import_module('symbol_' + args.network).get_symbol() 68 | # concat = internals["ch_concat_5b_chconcat_output"] 69 | #symbol = mx.symbol.Dropout(data=symbol, name='dropout1') 70 | pooling = mx.symbol.Pooling( 71 | data=symbol, kernel=(1, 1), global_pool=True, 72 | pool_type='avg', name='global_pool') 73 | flatten = mx.symbol.Flatten(data=pooling, name='flatten') 74 | l2 = mx.symbol.L2Normalization(data=flatten, name='l2_norm') 75 | return l2 76 | 77 | args = parse_args() 78 | print args 79 | devices = [mx.gpu(int(i)) for i in args.gpus.split(',')] 80 | batch_size = args.batch_size 81 | model_path = 'models' 82 | 83 | def dist(a, b): 84 | diff = a - b 85 | return mx.nd.sum(diff*diff).asnumpy()[0] 86 | 87 | def copyto(x): 88 | return x.copyto(x.context) 89 | 90 | cnts_g, cnt_g = [0, 0, 0, 0], [0] 91 | max_turn, gc = 10, 0 92 | max_frames = max_turn 93 | cmc1 = np.zeros(max_frames) 94 | MAP = np.zeros(max_frames) 95 | results = [] 96 | for sets in xrange(10): 97 | arg_params, aux_params = load_checkpoint( 98 | '%s/%s_%d' % (model_path, args.base_model_load_prefix, sets), args.base_model_load_epoch) 99 | base_mod = mx.mod.Module(symbol=build_base_net(args), data_names=('data', ), label_names=None, context=devices) 100 | base_mod.bind(data_shapes=[('data', (args.batch_size, 3, 224, 112))], for_training=False) 101 | base_mod.init_params(initializer=None, arg_params=arg_params,aux_params=aux_params,force_init=True) 102 | 103 | dataiter = get_imRecordIter( 104 | args, '%s%d' % (args.dataset, sets), (3, 224, 112), args.batch_size, 105 | shuffle=False, aug=False, even_iter=False) 106 | 107 | dataiter.reset() 108 | 109 | output = base_mod.predict(dataiter) 110 | F = output 111 | F2 = F 112 | print F.shape 113 | 114 | cnt_lst = np.loadtxt(args.data_dir + '/' + 'image_test' + str(sets) + '.txt').astype(int) 115 | N = cnt_lst.shape[0] / 2 116 | 117 | avp = [] 118 | for i in xrange(N + N): 119 | for j in xrange(cnt_lst[i], cnt_lst[i + 1]): 120 | if j == cnt_lst[i]: 121 | g = copyto(F[j]) 122 | else: 123 | g += F[j] 124 | avp.append(g / mx.nd.sqrt(mx.nd.sum(g * g))) 125 | 126 | cnts, cnt = [0, 0, 0, 0], [0] 127 | 128 | for i in xrange(N+N): 129 | d = [] 130 | a = i % N 131 | scores = [] 132 | label = np.array([(1 if _ == a else 0) for _ in xrange(N)]) 133 | for j in xrange(N): 134 | d.append(dist(avp[i], avp[j if i >= N else (j + N)])) 135 | g, x, y = [], mx.nd.zeros((int(F.shape[1])),ctx=devices[0]), mx.nd.zeros((int(F.shape[1])),ctx=devices[0]) 136 | for k in xrange(max_frames): 137 | if i < N: 138 | x += F2[random.randrange(cnt_lst[i], cnt_lst[i+1])] 139 | y += F2[random.randrange(cnt_lst[j+N], cnt_lst[j+1+N])] 140 | else: 141 | x += F2[random.randrange(cnt_lst[i], cnt_lst[i+1])] 142 | y += F2[random.randrange(cnt_lst[j], cnt_lst[j+1])] 143 | g.append(dist(x/(k+1), y/(k+1))) 144 | scores.append(g) 145 | scores = np.array(scores) 146 | for j in xrange(max_frames): 147 | MAP[j] += average_precision_score(label, -scores[:, j]) 148 | if min(scores[:, j]) == scores[a, j]: 149 | cmc1[j] += 1 150 | gc += 1 151 | print gc 152 | print MAP[:10] / gc 153 | print cmc1[:10] / gc 154 | update_cnts(d, cnts, cnt, N, i if i < N else i - N) 155 | update_cnts(d, cnts_g, cnt_g, N, i if i < N else i - N) 156 | print i, norm_cnts(cnts, cnt), norm_cnts(cnts_g, cnt_g) 157 | results.append((cnts, cnt)) -------------------------------------------------------------------------------- /RL/img_lib.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import numpy as np 3 | import cv2 4 | import random 5 | 6 | 7 | class ImgLibrary(mx.io.DataIter): 8 | ''' 9 | Load images from disk/memory 10 | ''' 11 | def __init__(self, lst_name, batch_size, aug_params=dict(), shuffle=False, data_dir=''): 12 | super(ImgLibrary, self).__init__() 13 | self.batch_size = batch_size 14 | self.aug_params = aug_params.copy() 15 | self.shuffle = shuffle 16 | self.data_dir = data_dir 17 | 18 | self.data, self.labels = ImgLibrary.load_data(lst_name, data_dir) 19 | #print "load data over" 20 | self.data_num = self.labels.shape[0] 21 | self.label_num = 1 if len(self.labels.shape) == 1 else self.labels.shape[1] 22 | #print self.data_num, self.label_num 23 | self.reset() 24 | 25 | @staticmethod 26 | def load_data(lst_name, data_dir): 27 | img_lst = [x.strip().split('\t') 28 | for x in file(lst_name).read().splitlines()] 29 | im = cv2.imread(data_dir + img_lst[0][-1]) 30 | print data_dir + img_lst[0][-1] 31 | h, w = im.shape[:2] 32 | n, m = len(img_lst), len(img_lst[0]) - 2 33 | data = []#np.zeros((n, h, w, 3), dtype=np.uint8) 34 | labels = np.zeros((n, m), dtype=np.int32) if m > 1 else np.zeros((n, ), dtype=np.int32) 35 | 36 | for i in range(len(img_lst)): 37 | #im = cv2.imread(data_dir + img_lst[i][-1]) 38 | 39 | #data[i] = im 40 | data.append(data_dir + img_lst[i][-1]) 41 | labels[i] = img_lst[i][1:-1] if m > 1 else img_lst[i][1] 42 | 43 | data = np.array(data) 44 | 45 | return data, labels 46 | 47 | @staticmethod 48 | def even_shuffle(labels): 49 | ''' 50 | shuffle images lists and make pairs 51 | ''' 52 | s = [(x, int(random.random() * 1e5), i) for i, x in enumerate(labels)] 53 | s = sorted(s, key=lambda x: (x[0], x[1])) 54 | lst = [x[2] for x in s] 55 | 56 | idx = range(0, len(lst), 2) 57 | random.shuffle(idx) 58 | ret = [] 59 | for i in idx: 60 | ret.append(lst[i]) 61 | ret.append(lst[i + 1]) 62 | 63 | return ret 64 | 65 | @staticmethod 66 | def model_shuffle(labels): 67 | ''' 68 | shuffle images and images with same model are grouped together 69 | ''' 70 | models_idx = range(int(np.max(labels)) + 1) 71 | random.shuffle(models_idx) 72 | s = [(models_idx[x], int(random.random() * 1e5), i) for i, x in enumerate(labels)] 73 | s = sorted(s, key=lambda x: (x[0], x[1])) 74 | lst = [x[2] for x in s] 75 | 76 | return lst 77 | 78 | def reset(self): 79 | self.current = 0 80 | if self.shuffle: 81 | #idx = ImgLibrary.even_shuffle(self.labels) 82 | idx = ImgLibrary.model_shuffle(self.labels) 83 | self.data = self.data[idx] 84 | self.labels = self.labels[idx] 85 | 86 | @property 87 | def provide_data(self): 88 | shape = self.aug_params['input_shape'] 89 | 90 | return [('data', (self.batch_size, shape[0], shape[1], shape[2]))] 91 | 92 | @property 93 | def provide_label(self): 94 | return [('softmax_label', (self.batch_size, self.label_num))] 95 | 96 | @staticmethod 97 | def augment(im, aug_params, aug=False): 98 | ''' 99 | augmentation (resize, crop, mirror) 100 | ''' 101 | crop_h, crop_w = aug_params['input_shape'][1:] 102 | ori_h, ori_w = im.shape[:2] 103 | resize = aug_params['resize'] 104 | if ori_h < ori_w: 105 | h, w = resize, int(float(resize) / ori_h * ori_w) 106 | else: 107 | h, w = int(float(resize) / ori_w * ori_h), resize 108 | 109 | if h != ori_h: 110 | im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR) 111 | 112 | x, y = (w - crop_w) / 2, (h - crop_h) / 2 113 | if aug_params['rand_crop'] and aug: 114 | x = random.randint(0, w - crop_w) 115 | y = random.randint(0, h - crop_h) 116 | im = im[y:y + crop_h, x:x + crop_w, :] 117 | 118 | # cv2.imshow("name", im.astype(np.uint8)) 119 | # cv2.waitKey() 120 | 121 | # Blur 122 | '''org = x.asnumpy() 123 | sig = random.random() * 5 124 | result = np.zeros_like(org) 125 | for i in xrange(3): 126 | result[0, i, :, :] = ndimage.gaussian_filter(org[0, i, :, :], sig) 127 | print sig 128 | import cv2 129 | cv2.imshow("name", (result[0].transpose(1, 2, 0)+128).astype(np.uint8)) 130 | cv2.waitKey() 131 | cv2.imshow("name", (org[0].transpose(1, 2, 0)+128).astype(np.uint8)) 132 | cv2.waitKey()''' 133 | 134 | im = np.transpose(im, (2, 0, 1)) 135 | newim = np.zeros_like(im) 136 | newim[0] = im[2] 137 | newim[1] = im[1] 138 | newim[2] = im[0] 139 | 140 | if aug and aug_params['rand_mirror'] and random.randint(0, 1) == 1: 141 | newim = newim[:, :, ::-1] 142 | newim -= aug_params['mean'] 143 | 144 | return newim 145 | 146 | def get_single(self, i, aug=False): 147 | im = cv2.imread(self.data[i]).astype(np.float32) 148 | im = ImgLibrary.augment(im, self.aug_params, aug) 149 | return im 150 | 151 | def next(self): 152 | #if self.current + self.batch_size > self.data_num: 153 | if self.current > self.data_num: 154 | raise StopIteration 155 | 156 | shape = self.aug_params['input_shape'] 157 | x = np.zeros((self.batch_size, shape[0], shape[1], shape[2])) 158 | y = np.zeros((self.batch_size, self.label_num) if self.label_num > 1 159 | else (self.batch_size, )) 160 | index = [] 161 | for i in range(self.current, self.current + self.batch_size): 162 | im = cv2.imread(self.data[i % self.data_num]).astype(np.float32) 163 | im = ImgLibrary.augment(im, self.aug_params) 164 | x[i - self.current] = im 165 | y[i - self.current] = self.labels[i % self.data_num] 166 | index.append(i) 167 | 168 | #x -= self.aug_params['mean'] 169 | 170 | x = mx.nd.array(x) 171 | label = mx.nd.array(y) 172 | 173 | batch = mx.io.DataBatch(data=[x], label=[label], pad=0, index=index) 174 | self.current += self.batch_size 175 | 176 | return batch -------------------------------------------------------------------------------- /RL/mars_test_baseline.py: -------------------------------------------------------------------------------- 1 | import sys 2 | #sys.path.insert(0, "mxnet/python/") 3 | import find_mxnet 4 | import mxnet as mx 5 | 6 | import numpy as np 7 | import argparse 8 | import random 9 | 10 | from symbols import sym_base_net, sym_DQN 11 | from utils import get_imRecordIter, load_checkpoint 12 | from sklearn.metrics import average_precision_score 13 | 14 | 15 | cmcs = [1, 5, 10, 20] 16 | cn = 4 17 | 18 | def norm_cnts(cnts, cnt): 19 | return [cnts[i] / cnt[0] for i in xrange(cn)] 20 | 21 | def update_cnts(cur, cnts, cnt): 22 | for j in xrange(cn): 23 | if cur < cmcs[j]: 24 | cnts[j] += 1.0 25 | cnt[0] += 1.0 26 | 27 | def parse_args(): 28 | parser = argparse.ArgumentParser( 29 | description='single domain car recog training') 30 | parser.add_argument('--gpus', type=str, default='2', 31 | help='the gpus will be used, e.g "0,1"') 32 | parser.add_argument('--data-dir', type=str, 33 | default="/data3/matt/MARS", 34 | help='data directory') 35 | parser.add_argument('--sample-size', type=int, default=8, 36 | help='sample frames from each video') 37 | parser.add_argument('--base-model-load-epoch', type=int, default=1, 38 | help='load the model on an epoch using the model-load-prefix') 39 | parser.add_argument('--base-model-load-prefix', type=str, default='mars_alex',#'mars_baseline_b8',# 40 | help='load model prefix') 41 | parser.add_argument('--network', type=str, 42 | default='alexnet',#'inception-bn',# 43 | help='network name') 44 | return parser.parse_args() 45 | 46 | 47 | def create_module(ctx, seq_len, is_train=False): 48 | net = sym_DQN(args, is_train=is_train, num_acts=args.num_acts, bn=False, global_stats=False, no_his=True) 49 | mod = mx.mod.Module(symbol=net, data_names=('data1', 'data2'), label_names=None, 50 | fixed_param_names=['data1', 'data2'], context=ctx) 51 | mod.bind(data_shapes=[('data1', (seq_len, 1024)), ('data2', (seq_len, 1024)),], 52 | for_training=is_train, inputs_need_grad=False) 53 | return mod 54 | 55 | def dist(a, b): 56 | diff = a - b 57 | return mx.nd.sum(diff*diff).asnumpy()[0] 58 | 59 | def copyto(x): 60 | return x.copyto(x.context) 61 | 62 | 63 | 64 | args = parse_args() 65 | print args 66 | devices = [mx.gpu(int(i)) for i in args.gpus.split(',')] 67 | 68 | batch_size = 128 69 | seq_len = batch_size 70 | model_path = 'models' 71 | random.seed(19930214) 72 | pool = set([random.randrange(1980) for _ in xrange(100)]) 73 | print pool 74 | 75 | arg_params, aux_params = load_checkpoint( 76 | '../baseline/%s/%s' % (model_path, args.base_model_load_prefix), args.base_model_load_epoch) 77 | base_mod = mx.mod.Module(symbol=sym_base_net(args.network, is_test=True), data_names=('data',), label_names=None, context=devices) 78 | base_mod.bind(data_shapes=[('data', (1024, 3, 224, 112))], for_training=False) 79 | base_mod.init_params(initializer=None, arg_params=arg_params,aux_params=aux_params,force_init=True) 80 | 81 | dataiter = get_imRecordIter( 82 | args, 'recs/eval_test', (3, 224, 112), 1024, 83 | shuffle=False, aug=False, even_iter=True) 84 | dataiter.reset() 85 | F = base_mod.predict(dataiter) 86 | del dataiter 87 | print 'l2' 88 | print F 89 | 90 | print 'base feat predicted' 91 | query = np.loadtxt('/data3/matt/MARS/MARS-evaluation/info/query.csv', delimiter=',').astype(int) 92 | gallery = np.loadtxt('/data3/matt/MARS/MARS-evaluation/info/gallery.csv', delimiter=',').astype(int) 93 | 94 | cnts, cnt = [0, 0, 0, 0], [0] 95 | max_turn, tot_ava = args.sample_size, 1 96 | 97 | query[:, 2] -= 1 98 | gallery[:, 0] -= 1 99 | 100 | 101 | 102 | def get_data(a): 103 | return F[random.randrange(gallery[a, 0], gallery[a, 1])] 104 | 105 | max_turn, gc = 10, 0 106 | max_frames = max_turn 107 | cmc1 = np.zeros(max_frames) 108 | MAP = np.zeros(max_frames) 109 | results = [] 110 | 111 | def dist(a, b): 112 | diff = a - b 113 | return mx.nd.sum(diff*diff).asnumpy()[0] 114 | 115 | cur = mx.nd.zeros((gallery.shape[0], F.shape[1]),ctx=devices[0]) 116 | avgs = mx.nd.zeros((max_frames, gallery.shape[0], F.shape[1]),ctx=devices[0]) 117 | for k in xrange(max_frames): 118 | for a in xrange(gallery.shape[0]): 119 | j, camj = gallery[a, 2:] 120 | if j == -1 or gallery[a, 0] == gallery[a, 1]: 121 | continue 122 | cur[a] = get_data(a) 123 | if k == 0: 124 | avgs[k] = copyto(cur) 125 | else: 126 | avgs[k] = (avgs[k - 1] + cur) 127 | for k in xrange(1, max_frames): 128 | avgs[k] /= (k + 1) 129 | 130 | 131 | cnts_g, cnt_g = [0, 0, 0, 0], [0] 132 | 133 | for q in xrange(query.shape[0]): 134 | i, cam, idx = query[q] 135 | if gallery[idx, 0] == gallery[idx, 1]: 136 | continue 137 | scores = [] 138 | labels = [] 139 | d = [] 140 | for k in xrange(max_frames): 141 | g = avgs[k] 142 | diff = g[idx] - g 143 | d.append(mx.nd.sum(diff*diff, axis=1).asnumpy()) 144 | for a in xrange(gallery.shape[0]): 145 | j, camj = gallery[a, 2:] 146 | if j == i and camj == cam or j == -1 or gallery[a, 0] == gallery[a, 1]: 147 | continue 148 | g = [] 149 | for k in xrange(max_frames): 150 | g.append(d[k][a]) 151 | scores.append(g) 152 | labels.append(1 if i == j else 0) 153 | 154 | scores = np.array(scores) 155 | for j in xrange(max_frames): 156 | MAP[j] += average_precision_score(labels, -scores[:, j]) 157 | a = np.argmin(scores[:, j]) 158 | if labels[a] == 1: 159 | cmc1[j] += 1 160 | gc += 1 161 | print q, i, cam, idx, gc 162 | print MAP[:10] / gc 163 | print cmc1[:10] / gc 164 | 165 | ''' 166 | avgs = [] 167 | for a in xrange(gallery.shape[0]): 168 | j, camj = gallery[a, 2:] 169 | if j == -1 or gallery[a, 0] == gallery[a, 1]: 170 | avgs.append(None) 171 | continue 172 | for k in xrange(gallery[a, 0], gallery[a, 1]): 173 | if k == gallery[a, 0]: 174 | avg_opp = copyto(F[k]) 175 | else: 176 | avg_opp += F[k] 177 | avg_opp /= gallery[a, 1] - gallery[a, 0] 178 | avgs.append(avg_opp) 179 | 180 | for q in xrange(query.shape[0]): 181 | if not q in pool: 182 | continue 183 | i, cam, idx = query[q] 184 | if gallery[idx, 0] == gallery[idx, 1]: 185 | continue 186 | scores = [] 187 | label = [] 188 | d = [] 189 | for k in xrange(gallery[idx, 0], gallery[idx, 1]): 190 | if k == gallery[idx, 0]: 191 | avg_cur = copyto(F[k]) 192 | else: 193 | avg_cur += F[k] 194 | avg_cur /= gallery[idx, 1] - gallery[idx, 0] 195 | for a in xrange(gallery.shape[0]): 196 | j, camj = gallery[a, 2:] 197 | if j == i and camj == cam or j == -1 or gallery[a, 0] == gallery[a, 1]: 198 | continue 199 | d.append((dist(avg_cur, avgs[a]), j)) 200 | 201 | d = sorted(d) 202 | cur = 0 203 | for a in xrange(len(d)): 204 | j = d[a][1] 205 | if j == i: 206 | break 207 | else: 208 | cur += 1 209 | update_cnts(cur, cnts, cnt) 210 | print q, i, cam, idx, cur, norm_cnts(cnts, cnt) 211 | ''' 212 | -------------------------------------------------------------------------------- /RL/symbols.py: -------------------------------------------------------------------------------- 1 | import find_mxnet 2 | import mxnet as mx 3 | import importlib 4 | 5 | from rnn_models import get_gru_cell, get_lstm_cell, lstm, gru 6 | 7 | BN_EPS = 1e-5+1e-10 8 | 9 | 10 | def sym_base_net(network, fix_gamma=False, is_train=False, global_stats=False, is_test=False): 11 | ''' 12 | network structure 13 | ''' 14 | if is_test: 15 | data = mx.symbol.Variable(name="data") 16 | else: 17 | data1 = mx.symbol.Variable(name="data1") 18 | data2 = mx.symbol.Variable(name="data2") 19 | data = mx.sym.Concat(*[data1, data2], dim=0, name='data') 20 | symbol = importlib.import_module('symbol_' + network).get_symbol(data, fix_gamma=fix_gamma, global_stats=global_stats) 21 | pooling = mx.symbol.Pooling( 22 | data=symbol, kernel=(1, 1), global_pool=True, 23 | pool_type='avg', name='global_pool') 24 | flatten = mx.symbol.Flatten(data=pooling, name='flatten') 25 | if is_test: 26 | l2 = mx.sym.L2Normalization(flatten) 27 | return l2 28 | else: 29 | split_flatten = mx.sym.SliceChannel(flatten, num_outputs=2, axis=0) 30 | return split_flatten[0], split_flatten[1] 31 | return None 32 | 33 | 34 | def fusion_layer(data, num_hidden, num_layers, name, l2=False, weights=[], bias=[]): 35 | org_data = data 36 | for i in xrange(num_layers): 37 | data = mx.sym.FullyConnected(data=data, num_hidden=num_hidden, name='%s%d'%(name,i), weight=weights[i], bias=bias[i]) 38 | if i == 0: 39 | first_layer = data 40 | elif i == num_layers - 1: 41 | continue 42 | data = mx.sym.Activation(data=data, act_type='relu', name='%srelu%d'%(name,i)) 43 | if l2: 44 | return mx.sym.Concat(*[mx.sym.L2Normalization(first_layer), mx.sym.L2Normalization(data)], dim=1) 45 | return mx.sym.Concat(*[org_data, data], dim=1) 46 | 47 | 48 | def get_dist_sym(a, b): 49 | diff = a - b 50 | return mx.sym.sum(diff*diff, axis=1, keepdims=1) 51 | 52 | 53 | def sym_DQN(data1, data2, num_sim, num_hidden, min_states, min_imgs, num_acts=4, fusion=False, is_train=False, bn=False, l2_norm=False, global_stats=False, no_his=True, debug=False, maxout=False, cls=False): 54 | #data1 = mx.sym.Dropout(data1) 55 | #data2 = mx.sym.Dropout(data2) 56 | 57 | featmaps = [mx.sym.SliceChannel(mx.sym.L2Normalization(data1), num_outputs=min_states, axis=0), 58 | mx.sym.SliceChannel(mx.sym.L2Normalization(data2), num_outputs=min_states, axis=0)] 59 | gs = featmaps 60 | ds, ts = [], [] 61 | for i in xrange(min_states): 62 | d, t = [], [] 63 | d.append(get_dist_sym(featmaps[0][i], featmaps[1][i])) 64 | t.append(mx.sym.sum(featmaps[0][i] * featmaps[1][i], axis=1, keepdims=1)) 65 | for j in xrange(i): 66 | d.append(get_dist_sym(featmaps[0][i], featmaps[1][j])) 67 | d.append(get_dist_sym(featmaps[0][j], featmaps[1][i])) 68 | t.append(mx.sym.sum(featmaps[0][i] * featmaps[1][j], axis=1, keepdims=1)) 69 | t.append(mx.sym.sum(featmaps[0][j] * featmaps[1][i], axis=1, keepdims=1)) 70 | ds.append(d) 71 | ts.append(t) 72 | print i, len(d) 73 | 74 | 75 | featmap = mx.sym.abs(mx.sym.L2Normalization(data1) - mx.sym.L2Normalization(data2)) 76 | featmaps = mx.sym.SliceChannel(featmap, num_outputs=min_states, axis=0, name='featmaps') 77 | W1, W2, W3 = mx.symbol.Variable(name="fc1_weight"), mx.symbol.Variable(name="fc2_weight"), mx.symbol.Variable(name="Qv_weight") 78 | b1, b2, b3 = mx.symbol.Variable(name="fc1_bias"), mx.symbol.Variable(name="fc2_bias"), mx.symbol.Variable(name="Qv_bias") 79 | if fusion: 80 | Wfus = [[mx.symbol.Variable(name="fus%d-%d_weight"%(i,j)) for j in xrange(2)] for i in xrange(3)] 81 | bfus = [[mx.symbol.Variable(name="fus%d-%d_bias"%(i,j)) for j in xrange(2)] for i in xrange(3)] 82 | ############# mini batch ################### 83 | ret, unsures, atts = [], [], [] 84 | if True: 85 | for i in xrange(min_states): 86 | if i == 0: 87 | tmin, tmax, tsum = ts[0][0], ts[0][0], ts[0][0] 88 | dmin, dmax, dsum = ds[0][0], ds[0][0], ds[0][0] 89 | else: 90 | for j in xrange(len(ts[i])): 91 | tmin = mx.sym.minimum(tmin, ts[i][j]) 92 | tmax = mx.sym.maximum(tmax, ts[i][j]) 93 | tsum = tsum + ts[i][j] 94 | for j in xrange(len(ds[i])): 95 | dmin = mx.sym.minimum(dmin, ds[i][j]) 96 | dmax = mx.sym.maximum(dmax, ds[i][j]) 97 | dsum = dsum + ds[i][j] 98 | if i <= 1: 99 | agg = featmaps[0] 100 | feat = mx.sym.Concat(*[tmin, tmax, tsum / ((i + 1) * (i + 1)), dmin, dmax, dsum / ((i + 1) * (i + 1)),], dim=1) 101 | else: 102 | print i, len(unsures), len(ret) 103 | g1 = mx.sym.broadcast_mul(gs[0][0], unsures[0]) 104 | g2 = mx.sym.broadcast_mul(gs[1][0], unsures[0]) 105 | wsum = unsures[0] 106 | for j in xrange(1, i): 107 | g1 = g1 + mx.sym.broadcast_mul(gs[0][j], unsures[j]) 108 | g2 = g2 + mx.sym.broadcast_mul(gs[1][j], unsures[j]) 109 | wsum = wsum + unsures[j] 110 | g1 = mx.sym.broadcast_div(g1, wsum) 111 | g2 = mx.sym.broadcast_div(g2, wsum) 112 | agg = mx.sym.abs(g1 - g2) 113 | feat = mx.sym.Concat(*[tmin, tmax, tsum / ((i + 1) * (i + 1)), dmin, dmax, dsum / ((i + 1) * (i + 1)),], dim=1) 114 | 115 | fm = featmaps[i] 116 | if fusion: 117 | print 'mini batch fusion on' 118 | agg = fusion_layer(agg, num_hidden, 2, 'fus_featmap1', weights=[Wfus[0][0], Wfus[0][1]], bias=[bfus[0][0], bfus[0][1]])#, l2=True) 119 | fm = fusion_layer(fm, num_hidden, 2, 'fus_featmap2', weights=[Wfus[1][0], Wfus[1][1]], bias=[bfus[1][0], bfus[1][1]])#, l2=True) 120 | feat = fusion_layer(feat, 32, 2, 'fus_feat', weights=[Wfus[2][0], Wfus[2][1]], bias=[bfus[2][0], bfus[2][1]]) 121 | diff = mx.sym.Concat(*[agg, fm, feat], dim=1, name='diff%d'%i) 122 | #diff = mx.sym.Concat(*[agg, fm], dim=1, name='diff%d'%i) 123 | sir_fc1 = mx.sym.FullyConnected(data=diff, num_hidden=num_hidden,name='sirfc1-%d'%i, weight=W1, bias=b1) 124 | sir_relu1 = mx.sym.Activation(data=sir_fc1, act_type='relu', name='sirrl1-%d'%i) 125 | sir_fc2 = mx.sym.FullyConnected(data=sir_relu1, num_hidden=num_hidden,name='sirfc2-%d'%i, weight=W2, bias=b2) 126 | sir_relu2 = mx.sym.Activation(data=sir_fc2, act_type='relu', name='sirrl2-%d'%i) 127 | Q = mx.sym.FullyConnected(data=sir_relu2, num_hidden=num_acts,name='Qvalue-%d'%i, weight=W3, bias=b3) 128 | qsm = mx.sym.SoftmaxActivation(Q) 129 | qsm = mx.sym.BlockGrad(qsm) 130 | ret.append(Q) 131 | atts.append(qsm) 132 | if i + 1 < min_states: 133 | Q_sliced = mx.sym.SliceChannel(qsm, num_outputs=3, axis=1) 134 | unsures.append(1.0 - Q_sliced[2]) 135 | 136 | Qvalue = mx.sym.Concat(*ret, dim=0, name='Qvalues') 137 | attss = mx.sym.Concat(*atts, dim=0, name='atts') 138 | return mx.sym.Group([Qvalue, attss]) 139 | 140 | 141 | if __name__ == '__main__': 142 | #sym = build_base_net('inception-bn') 143 | #mx.viz.print_summary(sym, {'data': (1, 3, 128, 64)}) 144 | sym = sym_DQN(128, 128, num_acts=3, min_states=2, min_imgs=4, fusion=True) 145 | mx.viz.print_summary(sym, {'data1': (2, 1024), 'data2': (2, 1024)}) 146 | -------------------------------------------------------------------------------- /baseline/calc_cmc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | import argparse 4 | import munkres 5 | import hungarian 6 | import random 7 | import sys 8 | 9 | 10 | cmcs = [1, 5, 10, 20] 11 | cn = 4 12 | single_sample_times, sample_rounds = 100, 10 13 | match_points = 100 14 | 15 | def norm_cnts(cnts, cnt): 16 | return [cnts[i] / cnt[0] for i in xrange(cn)] 17 | 18 | def update_cnts(d, cnts, cnt, N, i): 19 | r = np.argsort(d) 20 | cur = -1 21 | for j in xrange(N): 22 | if r[j] == i: 23 | cur = j 24 | break 25 | for j in xrange(cn): 26 | if cur < cmcs[j]: 27 | cnts[j] += 1.0 28 | cnt[0] += 1.0 29 | print cur, norm_cnts(cnts, cnt) 30 | 31 | def pooling_method(f, N): 32 | cam0 = [] 33 | for i in xrange(N): 34 | p = np.zeros(1024) 35 | #p = np.full(1024, -1e100) 36 | for a in xrange(cnt_lst[i], cnt_lst[i + 1]): 37 | p += f[a] 38 | #p = np.maximum(p, f[a]) 39 | p /= (cnt_lst[i + 1] - cnt_lst[i]) 40 | cam0.append(p) 41 | cam1 = [] 42 | for i in xrange(N): 43 | p = np.zeros(1024) 44 | #p = np.full(1024, -1e100) 45 | for a in xrange(cnt_lst[i + N], cnt_lst[i + N + 1]): 46 | p += f[a] 47 | #p = np.maximum(p, f[a]) 48 | p /= (cnt_lst[i + N + 1] - cnt_lst[i + N]) 49 | cam1.append(p) 50 | 51 | cam0, cam1 = np.array(cam0), np.array(cam1) 52 | for i in xrange(1024): 53 | norm = (cam0[:, i] * cam0[:, i]).sum() + (cam1[:, i] * cam1[:, i]).sum() 54 | norm = math.sqrt(norm) 55 | cam0[:, i] /= norm 56 | cam1[:, i] /= norm 57 | 58 | cnts, cnt = [0, 0, 0, 0], [0] 59 | for i in xrange(N): 60 | d = np.zeros(N) 61 | for j in xrange(N): 62 | t = (cam0[i] - cam1[j]) 63 | d[j] += (t * t).sum() 64 | update_cnts(d, cnts, cnt, N, i) 65 | 66 | for i in xrange(N): 67 | d = np.zeros(N) 68 | for j in xrange(N): 69 | t = (cam1[i] - cam0[j]) 70 | d[j] += (t * t).sum() 71 | update_cnts(d, cnts, cnt, N, i) 72 | 73 | print 'pooling method', norm_cnts(cnts, cnt) 74 | 75 | def calc_mean(d): 76 | ret = np.zeros(len(d)) 77 | for t in xrange(sample_rounds): 78 | for i in xrange(len(d)): 79 | x = 0.0 80 | for k in xrange(single_sample_times): 81 | a = random.randint(0, d[i].shape[0] - 1) 82 | b = random.randint(0, d[i].shape[1] - 1) 83 | x += d[i][a][b] 84 | x /= single_sample_times 85 | ret[i] += x 86 | return ret 87 | 88 | def calc_median(d): 89 | ret = np.zeros(len(d)) 90 | for t in xrange(sample_rounds): 91 | for i in xrange(len(d)): 92 | x = [] 93 | for k in xrange(single_sample_times): 94 | a = random.randint(0, d[i].shape[0] - 1) 95 | b = random.randint(0, d[i].shape[1] - 1) 96 | x.append(d[i][a][b]) 97 | x = sorted(x) 98 | ret[i] += x[single_sample_times / 2] + x[single_sample_times / 2 + 1] 99 | return ret 100 | 101 | def calc_min(d): 102 | ret = np.zeros(len(d)) 103 | for t in xrange(sample_rounds): 104 | for i in xrange(len(d)): 105 | x = 1e100 106 | for k in xrange(single_sample_times): 107 | a = random.randint(0, d[i].shape[0] - 1) 108 | b = random.randint(0, d[i].shape[1] - 1) 109 | x = min(x, d[i][a][b]) 110 | ret[i] += x 111 | return ret 112 | 113 | def calc_match(d): 114 | ret = np.zeros(len(d)) 115 | for t in xrange(sample_rounds): 116 | for i in xrange(len(d)): 117 | choices_a = [random.randint(0, d[i].shape[0] - 1) for _ in xrange(match_points)] 118 | choices_b = [random.randint(0, d[i].shape[1] - 1) for _ in xrange(match_points)] 119 | mat = d[i][choices_a] 120 | mat = (mat.T)[choices_b] 121 | am = np.array(mat) 122 | match = hungarian.lap(am)[0] 123 | #M = munkres.Munkres() 124 | #match = M.compute(am) 125 | x = 0.0 126 | #g = [] 127 | for p in xrange(len(match)): 128 | #for p in match: 129 | x += mat[i][match[i]] 130 | #g.append(mat[i][match[i]]) 131 | #x += mat[p[0]][p[1]] 132 | #g.sort() 133 | #ret[i] += g[len(g) / 2] if len(g) % 2 == 1 else (g[len(g) / 2] + g[len(g) / 2 - 1]) * 0.5 134 | ret[i] += x 135 | return ret 136 | 137 | def calc_order(d, rerank=False): 138 | ret = np.zeros(len(d)) 139 | t = 10000 140 | for i in xrange(len(d)): 141 | t = min(d[i].shape[0], t) 142 | t = min(t, d[i].shape[1]) 143 | for i in xrange(len(d)): 144 | if rerank: 145 | pass 146 | else: 147 | tp = min(d[i].shape[0], d[i].shape[1]) 148 | choices_a = xrange(tp) 149 | choices_b = xrange(tp) 150 | mat = d[i][choices_a] 151 | mat = (mat.T)[choices_b] 152 | am = np.array(mat) 153 | M = munkres.Munkres() 154 | #print mat.shape 155 | match = M.compute(am) 156 | match = sorted(match) 157 | g = [mat[match[0][0]][match[0][1]]] 158 | for p in xrange(1, len(match)): 159 | g.append(mat[match[p][0]][match[p][1]]) 160 | #for q in xrange(p - 1): 161 | #if match[p][1] > match[q][1]: 162 | # ret[i] += 1 163 | g = sorted(g) 164 | for p in xrange(t): 165 | ret[i] += g[p] 166 | print len(match), ret[i] 167 | #print ret 168 | return ret 169 | 170 | def other_method(f, N): 171 | cnts_median, cnt_median = [0, 0, 0, 0], [0] 172 | cnts_mean, cnt_mean = [0, 0, 0, 0], [0] 173 | cnts_min, cnt_min = [0, 0, 0, 0], [0] 174 | cnts_match, cnt_match = [0, 0, 0, 0], [0] 175 | cnts_order, cnt_order = [0, 0, 0, 0], [0] 176 | for i in xrange(N): 177 | d, na = [], cnt_lst[N + i + 1] - cnt_lst[N + i] 178 | for j in xrange(N): 179 | nb = cnt_lst[j + 1] - cnt_lst[j] 180 | t = np.zeros((nb, na)) 181 | for b in xrange(cnt_lst[j], cnt_lst[j + 1]): 182 | for a in xrange(cnt_lst[N + i], cnt_lst[N + i + 1]): 183 | g = f[a] - f[b] 184 | t[b - cnt_lst[j], a - cnt_lst[N + i]] = (g * g).sum() 185 | d.append(t) 186 | print 'cam0', i 187 | update_cnts(calc_mean(d), cnts_mean, cnt_mean, N, i) 188 | update_cnts(calc_median(d), cnts_median, cnt_median, N, i) 189 | update_cnts(calc_min(d), cnts_min, cnt_min, N, i) 190 | update_cnts(calc_match(d), cnts_match, cnt_match, N, i) 191 | #update_cnts(calc_order(d), cnts_order, cnt_order, N, i) 192 | sys.stdout.flush() 193 | for i in xrange(N): 194 | d, na = [], cnt_lst[i + 1] - cnt_lst[i] 195 | for j in xrange(N): 196 | nb = cnt_lst[N + j + 1] - cnt_lst[N + j] 197 | t = np.zeros((nb, na)) 198 | for b in xrange(cnt_lst[N + j], cnt_lst[N + j + 1]): 199 | for a in xrange(cnt_lst[i], cnt_lst[i + 1]): 200 | g = f[a] - f[b] 201 | t[b - cnt_lst[N + j], a - cnt_lst[i]] = (g * g).sum() 202 | d.append(t) 203 | print 'cam1', i 204 | update_cnts(calc_mean(d), cnts_mean, cnt_mean, N, i) 205 | update_cnts(calc_median(d), cnts_median, cnt_median, N, i) 206 | update_cnts(calc_min(d), cnts_min, cnt_min, N, i) 207 | update_cnts(calc_match(d), cnts_match, cnt_match, N, i) 208 | #update_cnts(calc_order(d), cnts_order, cnt_order, N, i) 209 | sys.stdout.flush() 210 | print 'min', norm_cnts(cnts_min, cnt_min) 211 | print 'mean', norm_cnts(cnts_mean, cnt_mean) 212 | print 'median', norm_cnts(cnts_median, cnt_median) 213 | print 'match', norm_cnts(cnts_match, cnt_match) 214 | sys.stdout.flush() 215 | 216 | def parse_args(): 217 | parser = argparse.ArgumentParser( 218 | description='Calc CMC Rank for ilds&prid dataset') 219 | parser.add_argument('--data', type=str, 220 | default='features/image_test-prid_baseline_b4.csv', 221 | help='data path') 222 | parser.add_argument('--list', type=str, 223 | #default='/data3/matt/iLIDS-VID/recs/image_test.txt', 224 | default='/data3/matt/prid_2011/recs/image_test.txt', 225 | help='list path') 226 | return parser.parse_args() 227 | 228 | args = parse_args() 229 | print args 230 | 231 | f, cnt_lst = np.loadtxt(args.data), np.loadtxt(args.list).astype(int) 232 | N = cnt_lst.shape[0] / 2 233 | for i in xrange(N): 234 | for a in xrange(cnt_lst[i] + 1, cnt_lst[i + 1]): 235 | f[a] += f[a - 1] 236 | for a in xrange(cnt_lst[i] + 1, cnt_lst[i + 1]): 237 | f[a] /= a - cnt_lst[i] + 1 238 | for i in xrange(N): 239 | for a in xrange(cnt_lst[N + i] + 1, cnt_lst[N + i + 1]): 240 | f[a] += f[a - 1] 241 | for a in xrange(cnt_lst[N + i] + 1, cnt_lst[N + i + 1]): 242 | f[a] /= a - cnt_lst[N + i] + 1 243 | 244 | pooling_method(f, N) 245 | other_method(f, N) 246 | 247 | -------------------------------------------------------------------------------- /RL/find_eg.py: -------------------------------------------------------------------------------- 1 | import sys 2 | #sys.path.insert(0, "mxnet/python/") 3 | import find_mxnet 4 | import mxnet as mx 5 | 6 | import numpy as np 7 | import argparse 8 | from sklearn.metrics import average_precision_score 9 | 10 | from batch_provider import BatchProvider 11 | from utils import get_imRecordIter, load_checkpoint 12 | from agent import sym_base_net, wash, get_Qvalue, create_moduleQ 13 | 14 | import cv2, os 15 | 16 | import matplotlib as mpl 17 | import matplotlib.pyplot as plt 18 | 19 | mpl.rcParams['lines.linewidth'] = 1.5 20 | mpl.rcParams['savefig.dpi'] = 300 21 | mpl.rcParams['ps.useafm'] = True 22 | mpl.rcParams['pdf.use14corefonts'] = True 23 | mpl.rcParams['text.usetex'] = True 24 | mpl.rcParams['font.size'] = 24 25 | mpl.rcParams['font.family'] = "Times New Roman" 26 | mpl.rcParams['legend.fontsize'] = "small" 27 | mpl.rcParams['legend.fancybox'] = True 28 | mpl.rcParams['lines.markersize'] = 10 29 | mpl.rcParams['figure.figsize'] = 9, 6.3 30 | mpl.rcParams['legend.labelspacing'] = 0.1 31 | mpl.rcParams['legend.borderpad'] = 0.1 32 | mpl.rcParams['legend.borderaxespad'] = 0.2 33 | mpl.rcParams['font.monospace'] = "Courier 10 Pitch" 34 | mpl.rcParams['text.latex.preamble'] = [r'\boldmath'] 35 | 36 | 37 | def plot(Q, terminal, name): 38 | t = [1+_ for _ in xrange(terminal)] 39 | a = [Q[_, 1] for _ in xrange(terminal)] 40 | b = [Q[_, 0] for _ in xrange(terminal)] 41 | c = [Q[_, 2] for _ in xrange(terminal)] 42 | 43 | plt.figure(figsize=(10,9)) 44 | ax = plt.gca() 45 | 46 | plt.plot(t, a, 47 | marker='o', 48 | markersize=12, 49 | markerfacecolor=(0, 1, 0, 0.5), 50 | color='g', 51 | label='same', 52 | alpha=0.5, 53 | ) 54 | 55 | plt.plot(t, b, 56 | marker='x', 57 | markersize=12, 58 | markerfacecolor=(1, 0, 0, 0.5), 59 | color='r', 60 | label='different', 61 | alpha=0.5, 62 | ) 63 | 64 | plt.plot(t, c, 65 | marker='^', 66 | markersize=12, 67 | markerfacecolor=(1, 1, 0, 0.5), 68 | color='y', 69 | label='unsure', 70 | alpha=0.5, 71 | ) 72 | 73 | plt.ylabel(r'\textbf{Q-Value}') 74 | plt.xlabel(r'\textbf{\#. Time Steps}') 75 | plt.grid(linestyle=':') 76 | plt.savefig('%s.pdf'%name) 77 | 78 | 79 | def parse_args(): 80 | parser = argparse.ArgumentParser( 81 | description='single domain car recog training') 82 | parser.add_argument('--gpus', type=str, default='2', 83 | help='the gpus will be used, e.g "0,1"') 84 | parser.add_argument('--model-load-epoch', type=int, default=3, 85 | help='load the model on an epoch using the model-load-prefix') 86 | parser.add_argument('--model-load-prefix', type=str, default='ilds-TEST-DQN_test-1-2017.11.14-23.56.43-bs4-ss8-incp_prep__nobg_noregQv_block2_f2_nofus0-2_poscontra_fne0.1-1-1_tisr1-sgd_t500-_qg0.9-up0.2-vtd4.0-_lr1e1-_32-1024-_na3-3', 87 | help='load model prefix') 88 | parser.add_argument('--batch-size', type=int, default=1, 89 | help='the batch size') 90 | parser.add_argument('--boost-times', type=int, default=1, 91 | help='boosting times to increase robustness') 92 | return parser.parse_args() 93 | 94 | 95 | def get_train_args(name): 96 | fn = open('log/%s.log'%name) 97 | s = fn.readline()[10:] 98 | fn.close() 99 | s = 'ret=argparse.' + s 100 | exec(s) 101 | return ret 102 | 103 | test_args = parse_args() 104 | print 'test arg:', test_args 105 | devices = [mx.gpu(int(i)) for i in test_args.gpus.split(',')] 106 | args = get_train_args(test_args.model_load_prefix) 107 | print 'train arg:', args 108 | 109 | batch_size = args.batch_size 110 | num_epoch = args.num_epoches 111 | 112 | arg_params, aux_params = load_checkpoint('models/%s' % test_args.model_load_prefix, test_args.model_load_epoch) 113 | data1, data2 = sym_base_net(args.network, is_train=args.e2e, global_stats=True) 114 | Q = create_moduleQ(data1, data2, devices, args.sample_size, args.num_sim, args.num_hidden, args.num_acts, args.min_states, args.min_imgs, fusion=args.fusion, is_train=True, nh=not args.history, is_e2e=args.e2e, bn=args.q_bn) 115 | Q.init_params(initializer=None, 116 | arg_params=arg_params, 117 | aux_params=aux_params, 118 | allow_missing=False, 119 | force_init=True) 120 | 121 | 122 | valid_iter = get_imRecordIter( 123 | args, 'recs/%s'%args.valid_set, (3, 224, 112), 1, 124 | shuffle=False, aug=False, even_iter=True) 125 | train_iter = get_imRecordIter( 126 | args, 'recs/%s'%args.train_set, (3, 224, 112), 1, 127 | shuffle=False, aug=True, even_iter=True) 128 | 129 | valid_lst = np.loadtxt('%s/recs/%s.txt'%(args.data_dir, args.valid_set)).astype(int) 130 | train_lst = np.loadtxt('%s/recs/%s.txt'%(args.data_dir, args.train_set)).astype(int) 131 | 132 | valid = BatchProvider(valid_iter, valid_lst, False, args.sample_size, sample_ratio=0.5, is_valid=True, need_feat=args.history) 133 | train = BatchProvider(train_iter, train_lst, True, args.sample_size, sample_ratio=0.5, need_feat=args.history) 134 | N = args.num_id 135 | 136 | cmcs, ap, cmcn, vscores, vturns = [[], [], [], []], [], [1, 5, 10, 20], [], [] 137 | max_penalty=1 138 | 139 | 140 | def tocv2(im): 141 | newim = np.zeros_like(im) 142 | newim[0] = im[2] 143 | newim[1] = im[1] 144 | newim[2] = im[0] 145 | newim = np.transpose(newim, (1, 2, 0)) + 128 146 | newim = newim.astype(np.uint8) 147 | print newim 148 | return newim 149 | 150 | valid.reset() 151 | batch, valid_cnt, vv, vpool = 0, 0, np.zeros((N*2, N)), set() 152 | vs, vt = [0 for i in xrange(N+N)], [0 for i in xrange(N+N)] 153 | fts = [[0 for _2 in xrange(N)] for _1 in xrange(N)] 154 | fdir = 'plots/%s'%args.mode 155 | os.system('mkdir %s'%fdir) 156 | for i in xrange(args.sample_size + 1): 157 | os.system('mkdir %s/%d'%(fdir,i)) 158 | for j in xrange(4): 159 | os.system('mkdir %s/%d/%d'%(fdir,i,j)) 160 | 161 | while valid_cnt < N*N: 162 | batch += 1 163 | cur, a, b = valid.provide() 164 | y = ((a %N) == (b % N)) 165 | data_batch = wash(cur, devices[0]) 166 | Qvalue = get_Qvalue(Q, data_batch, is_train=False) 167 | print Q.get_outputs()[1].asnumpy() 168 | print Qvalue 169 | i = 0 170 | while i < args.sample_size: 171 | if args.total_forward: 172 | if i + 1 < args.sample_size: 173 | k = 2 174 | else: 175 | k = np.argmax(Qvalue[i, :2]) 176 | else: 177 | k = np.argmax(Qvalue[i]) 178 | cls = k % args.acts_per_round 179 | step = k - 1 180 | if cls >= 2: 181 | if i + step >= args.sample_size: 182 | r = -max_penalty 183 | terminal = True 184 | else: 185 | r = -args.penalty * (2.0 - (0.5 ** (step - 1))) 186 | terminal = False 187 | else: 188 | r = 1 if cls == y else -max_penalty 189 | terminal = True 190 | if args.pos_weight > 1: 191 | if y: 192 | r *= args.pos_weight 193 | else: 194 | if not y: 195 | r /= args.pos_weight 196 | print 'valid', i, (a, b), Qvalue[i], k, (y, cls), r 197 | va, vb = a, b % N 198 | if (va, vb) not in vpool: 199 | vs[va] += r 200 | vs[vb+N] += r 201 | if terminal: 202 | if (va, vb) not in vpool: 203 | fts[va][vb] = (k + (3 if va == vb else 0), i) 204 | vpool.add((va, vb)) 205 | valid_cnt += 1 206 | vv[va][vb] = Qvalue[i][0] - Qvalue[i][1] 207 | vt[va] += i + 1 208 | vv[vb+N][va] += vv[va][vb] 209 | vt[vb+N] += i + 1 210 | print va, vb, vv[va][vb], r 211 | if terminal and r == 1 and i > 0: 212 | img = np.zeros((3, cur[0][0].shape[1] * 2, cur[0][0].shape[2] * args.sample_size)) 213 | print va, vb, i 214 | for j in xrange(args.sample_size): 215 | img[:, :cur[0][j].shape[1], cur[0][j].shape[2]*j:cur[0][j].shape[2]*(j+1)] = cur[0][j] 216 | img[:, cur[0][j].shape[1]:, cur[1][j].shape[2]*j:cur[1][j].shape[2]*(j+1)] = cur[1][j] 217 | name = '%s/%d/%d/%d-%d'%(fdir, i if cls < 2 else args.sample_size, ((2 if y else 0) + (1 if Qvalue[i, 1] > Qvalue[i, 0] else 0)), va, vb) 218 | cv2.imwrite('%s.png'%(name), tocv2(img)) 219 | np.savetxt('%s.txt'%name, Qvalue) 220 | plot(Qvalue, i + 1, name) 221 | if terminal: 222 | break 223 | i += step 224 | for i in xrange(N*2): 225 | a, r = i % N, 0 226 | for b in xrange(N): 227 | if a != b and vv[i][b] <= vv[i][a]: 228 | r += 1 229 | for k in xrange(4): 230 | cmcs[k].append(1.0 if r < cmcn[k] else 0.0) 231 | vscores += [vs[i]] 232 | vturns += [vt[i]] 233 | score = np.array([-vv[i][_] for _ in xrange(N)]) 234 | label = np.array([(1 if _ == a else 0) for _ in xrange(N)]) 235 | ap.append(average_precision_score(label, score)) 236 | print 'ap', i, ap[-1] 237 | cnt_map = [[0 for j in xrange(6)] for i in xrange(args.sample_size)] 238 | for i in xrange(N): 239 | for j in xrange(N): 240 | cnt_map[fts[i][j][1]][fts[i][j][0]] += 1 241 | 242 | -------------------------------------------------------------------------------- /baseline/baseline.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, "mxnet/python/") 3 | import find_mxnet 4 | import mxnet as mx 5 | import logging 6 | import numpy as np 7 | import argparse 8 | from mxnet.optimizer import SGD 9 | import loss_layers 10 | import lsoftmax 11 | from verifi_iterator import verifi_iterator 12 | from even_iterator import Even_iterator 13 | import importlib 14 | 15 | 16 | def build_network(symbol, num_id, batchsize): 17 | ''' 18 | network structure 19 | ''' 20 | # concat = internals["ch_concat_5b_chconcat_output"] 21 | pooling = mx.symbol.Pooling( 22 | data=symbol, kernel=(1, 1), global_pool=True, 23 | pool_type='avg', name='global_pool') 24 | flatten = mx.symbol.Flatten(data=pooling, name='flatten') 25 | l2 = mx.symbol.L2Normalization(data=flatten, name='l2_norm') 26 | dropout = l2#mx.symbol.Dropout(data=l2, name='dropout1') 27 | 28 | if args.lsoftmax: 29 | #fc1 = mx.symbol.Custom(data=flatten, num_hidden=num_id, beta=1000, margin=3, scale=0.9999, beta_min=1, op_type='LSoftmax', name='lsoftmax') 30 | fc1 = mx.symbol.LSoftmax(data=flatten, num_hidden=num_id, beta=1000, margin=4, scale=0.99999, beta_min=3, name='lsoftmax') 31 | else: 32 | fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=num_id, name='cls_fc1') 33 | 34 | softmax1 = mx.symbol.SoftmaxOutput(data=fc1, name='softmax') 35 | 36 | outputs = [softmax1] 37 | if args.verifi: 38 | verifi = mx.symbol.Custom(data=dropout, grad_scale=1.0, threshd=args.verifi_threshd, op_type='verifiLoss', name='verifi') 39 | outputs.append(verifi) 40 | 41 | if args.triplet: 42 | triplet = mx.symbol.Custom(data=dropout, grad_scale=1.0, threshd=args.triplet_threshd, op_type='tripletLoss', name='triplet') 43 | outputs.append(triplet) 44 | 45 | if args.lmnn: 46 | lmnn = mx.symbol.Custom(data=dropout, epsilon=0.1, threshd=0.9, op_type='lmnnLoss', name='lmnn') 47 | outputs.append(lmnn) 48 | 49 | if args.center: 50 | center = mx.symbol.Custom(data=dropout, op_type='centerLoss', name='center', num_class=num_id, alpha=0.5, scale=1.0, batchsize=batchsize) 51 | outputs.append(center) 52 | 53 | return mx.symbol.Group(outputs) 54 | 55 | 56 | class Multi_Metric(mx.metric.EvalMetric): 57 | """Calculate accuracies of multi label""" 58 | def __init__(self, num=None, cls=1): 59 | super(Multi_Metric, self).__init__('multi-metric', num) 60 | self.cls = cls 61 | 62 | def update(self, labels, preds): 63 | # mx.metric.check_label_shapes(labels, preds) 64 | # classification loss 65 | for i in range(self.cls): 66 | pred_label = mx.nd.argmax_channel(preds[i]) 67 | pred_label = pred_label.asnumpy().astype('int32') 68 | label = labels[i].asnumpy().astype('int32') 69 | 70 | mx.metric.check_label_shapes(label, pred_label) 71 | 72 | if self.num is None: 73 | self.sum_metric += (pred_label.flat == label.flat).sum() 74 | self.num_inst += len(pred_label.flat) 75 | else: 76 | self.sum_metric[i] += (pred_label.flat == label.flat).sum() 77 | self.num_inst[i] += len(pred_label.flat) 78 | 79 | # verification losses 80 | for i in range(self.cls, len(preds)): 81 | pred = preds[i].asnumpy() 82 | if self.num is None: 83 | self.sum_metric += np.sum(pred) 84 | self.num_inst += len(pred) 85 | else: 86 | self.sum_metric[i] += np.sum(pred) 87 | self.num_inst[i] += len(pred) 88 | 89 | def get_imRecordIter(name, input_shape, batch_size, kv, shuffle=False, aug=False, even_iter=False): 90 | ''' 91 | get iterator use even_iterator or ImageRecordIter 92 | ''' 93 | if even_iter: 94 | aug_params = {} 95 | aug_params['resize'] = 128 96 | aug_params['rand_crop'] = aug 97 | aug_params['rand_mirror'] = aug 98 | aug_params['input_shape'] = input_shape 99 | aug_params['mean'] = 128.0 100 | 101 | dataiter = Even_iterator( 102 | '%s/%s.lst' % (args.data_dir, name), 103 | batch_size=batch_size / 2, 104 | aug_params=aug_params, 105 | shuffle=shuffle) 106 | else: 107 | dataiter = mx.io.ImageRecordIter( 108 | path_imglist="%s/%s.lst" % (args.data_dir, name), 109 | path_imgrec="%s/%s.rec" % (args.data_dir, name), 110 | # mean_img="models/market_mean.bin", 111 | mean_r=128.0, 112 | mean_g=128.0, 113 | mean_b=128.0, 114 | rand_crop=aug, 115 | rand_mirror=aug, 116 | prefetch_buffer=4, 117 | preprocess_threads=3, 118 | shuffle=shuffle, 119 | label_width=1, 120 | round_batch=False, 121 | data_shape=input_shape, 122 | batch_size=batch_size / 2, 123 | num_parts=kv.num_workers, 124 | part_index=kv.rank) 125 | 126 | return dataiter 127 | 128 | 129 | def get_iterators(batch_size, input_shape, train, test, kv, gpus=1): 130 | ''' 131 | use image lists to generate data iterators 132 | ''' 133 | train_dataiter1 = get_imRecordIter( 134 | '%s_even' % train, input_shape, batch_size, 135 | kv, shuffle=args.even_iter, aug=True, even_iter=args.even_iter) 136 | train_dataiter2 = get_imRecordIter( 137 | '%s_rand' % train, input_shape, batch_size, 138 | kv, shuffle=True, aug=True) 139 | val_dataiter1 = get_imRecordIter( 140 | '%s_even' % test, input_shape, batch_size, 141 | kv, shuffle=False, aug=False, even_iter=args.even_iter) 142 | val_dataiter2 = get_imRecordIter( 143 | '%s_rand' % test, input_shape, batch_size, 144 | kv, shuffle=False, aug=False) 145 | 146 | return verifi_iterator( 147 | train_dataiter1, train_dataiter2, use_verifi=args.verifi, use_center=args.center, use_lsoftmax=args.lsoftmax, gpus=gpus), \ 148 | verifi_iterator( 149 | val_dataiter1, val_dataiter2, use_verifi=args.verifi, use_center=args.center, use_lsoftmax=args.lsoftmax, gpus=gpus) 150 | 151 | 152 | def parse_args(): 153 | parser = argparse.ArgumentParser( 154 | description='single domain car recog training') 155 | parser.add_argument('--gpus', type=str, default='5', 156 | help='the gpus will be used, e.g "0,1"') 157 | parser.add_argument('--data-dir', type=str, 158 | default="/data3/matt/iLIDS-VID/recs", 159 | help='data directory') 160 | parser.add_argument('--num-examples', type=int, default=20000, 161 | help='the number of training examples') 162 | parser.add_argument('--num-id', type=int, default=150, 163 | help='the number of training ids') 164 | parser.add_argument('--batch-size', type=int, default=4, 165 | help='the batch size') 166 | parser.add_argument('--lr', type=float, default=1e-2, 167 | help='the initial learning rate') 168 | parser.add_argument('--num-epoches', type=int, default=1, 169 | help='the number of training epochs') 170 | parser.add_argument('--mode', type=str, default='ilds_baseline_b4', 171 | help='save names of model and log') 172 | parser.add_argument('--lsoftmax', action='store_true', default=False, 173 | help='if use large margin softmax') 174 | parser.add_argument('--verifi-label', action='store_true', default=False, 175 | help='if add verifi label') 176 | parser.add_argument('--verifi', action='store_true', default=False, 177 | help='if use verifi loss') 178 | parser.add_argument('--triplet', action='store_true', default=False, 179 | help='if use triplet loss') 180 | parser.add_argument('--lmnn', action='store_true', default=True, 181 | help='if use LMNN loss') 182 | parser.add_argument('--center', action='store_true', default=False, 183 | help='if use center loss') 184 | parser.add_argument('--verifi-threshd', type=float, default=0.9, 185 | help='verification threshold') 186 | parser.add_argument('--triplet-threshd', type=float, default=0.9, 187 | help='triplet threshold') 188 | parser.add_argument('--train-file', type=str, default="image_train", 189 | help='train file') 190 | parser.add_argument('--test-file', type=str, default="image_valid", 191 | help='test file') 192 | parser.add_argument('--kv-store', type=str, 193 | default='device', help='the kvstore type') 194 | parser.add_argument('--network', type=str, 195 | default='inception-bn', help='network name') 196 | parser.add_argument('--model-load-epoch', type=int, default=126, 197 | help='load the model on an epoch using the model-load-prefix') 198 | parser.add_argument('--model-load-prefix', type=str, default='inception-bn', 199 | help='load model prefix') 200 | parser.add_argument('--even-iter', action='store_true', default=False, 201 | help='if use even iterator') 202 | return parser.parse_args() 203 | 204 | 205 | def load_checkpoint(prefix, epoch): 206 | # symbol = sym.load('%s-symbol.json' % prefix) 207 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch)) 208 | arg_params = {} 209 | aux_params = {} 210 | for k, v in save_dict.items(): 211 | tp, name = k.split(':', 1) 212 | if tp == 'arg': 213 | arg_params[name] = v 214 | if tp == 'aux': 215 | aux_params[name] = v 216 | return (arg_params, aux_params) 217 | 218 | 219 | args = parse_args() 220 | 221 | print args 222 | batch_size = args.batch_size 223 | num_epoch = args.num_epoches 224 | devices = [mx.gpu(int(i)) for i in args.gpus.split(',')] 225 | lr = args.lr 226 | num_images = args.num_examples 227 | 228 | 229 | arg_params, aux_params = load_checkpoint( 230 | 'models/%s' % args.model_load_prefix, args.model_load_epoch) 231 | 232 | symbol = importlib.import_module( 233 | 'symbol_' + args.network).get_symbol() 234 | 235 | #batchsize4center=batch_size / len(devices) 236 | net = build_network(symbol, num_id=args.num_id, batchsize= batch_size) 237 | 238 | kv = mx.kvstore.create(args.kv_store) 239 | train, val = get_iterators( 240 | batch_size=batch_size, input_shape=(3, 224, 112), 241 | train=args.train_file, test=args.test_file, kv=kv, gpus=len(devices)) 242 | print train.batch_size 243 | #train = get_imRecordIter(args.train_file, (3, 224, 112), batch_size, kv) 244 | #val = get_imRecordIter(args.test_file, (3, 224, 112), batch_size, kv) 245 | 246 | stepPerEpoch = int(num_images * 2 / batch_size) 247 | lr_scheduler = mx.lr_scheduler.MultiFactorScheduler( 248 | step=[stepPerEpoch * x for x in [50, 75]], factor=0.1) 249 | init = mx.initializer.Xavier( 250 | rnd_type='gaussian', factor_type='in', magnitude=2) 251 | 252 | arg_names = net.list_arguments() 253 | sgd = SGD(learning_rate=args.lr, momentum=0.9, 254 | wd=0.0005, clip_gradient=10, lr_scheduler=lr_scheduler, 255 | rescale_grad=1.0 / batch_size) 256 | 257 | 258 | logging.basicConfig(filename='log/%s.log' % args.mode, level=logging.DEBUG) 259 | logger = logging.getLogger() 260 | logger.setLevel(logging.DEBUG) 261 | logging.info(args) 262 | 263 | print ('begining of mx.model.feedforward') 264 | 265 | model = mx.model.FeedForward( 266 | symbol=net, ctx=devices, num_epoch=num_epoch, arg_params=arg_params, 267 | aux_params=aux_params, initializer=init, optimizer=sgd) 268 | 269 | prefix = 'models/%s' % args.mode 270 | num = 1 271 | if args.verifi: 272 | num += 1 273 | if args.triplet: 274 | num += 1 275 | if args.lmnn: 276 | num += 1 277 | if args.center: 278 | num += 1 279 | 280 | 281 | eval_metric=Multi_Metric(num=num, cls=1) 282 | epoch_end_callback=mx.callback.do_checkpoint(prefix) 283 | batch_end_callback=mx.callback.Speedometer(batch_size=batch_size) 284 | print ('begining of model.fit') 285 | model.fit(X=train, eval_data=val, eval_metric=eval_metric, logger=logger, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback) 286 | print('done') -------------------------------------------------------------------------------- /baseline/lsoftmax.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import mxnet as mx 4 | import numpy as np 5 | 6 | 7 | # MXNET_CPU_WORKER_NTHREADS must be greater than 1 for custom op to work on CPU 8 | os.environ['MXNET_CPU_WORKER_NTHREADS'] = '2' 9 | 10 | 11 | class LSoftmaxOp(mx.operator.CustomOp): 12 | '''LSoftmax from 13 | ''' 14 | 15 | def __init__(self, margin, beta, beta_min, scale): 16 | self.margin = int(margin) 17 | self.beta = float(beta) 18 | self.beta_min = float(beta_min) 19 | self.scale = float(scale) 20 | self.c_map = [] 21 | self.k_map = [] 22 | c_m_n = lambda m, n: math.factorial(n) / math.factorial(m) / math.factorial(n-m) 23 | for i in range(margin+1): 24 | self.c_map.append(c_m_n(i, margin)) 25 | self.k_map.append(math.cos(i * math.pi / margin)) 26 | 27 | def find_k(self, cos_t): 28 | '''find k for cos(theta) 29 | ''' 30 | # for numeric issue 31 | eps = 1e-5 32 | le = lambda x, y: x < y or abs(x-y) < eps 33 | for i in range(self.margin): 34 | if le(self.k_map[i+1], cos_t) and le(cos_t, self.k_map[i]): 35 | return i 36 | raise ValueError('can not find k for cos_t = %f'%cos_t) 37 | 38 | def calc_cos_mt(self, cos_t): 39 | '''calculate cos(m*theta) 40 | ''' 41 | cos_mt = 0 42 | sin2_t = 1 - cos_t * cos_t 43 | flag = -1 44 | for p in range(self.margin / 2 + 1): 45 | flag *= -1 46 | cos_mt += flag * self.c_map[2*p] * pow(cos_t, self.margin-2*p) * pow(sin2_t, p) 47 | return cos_mt 48 | 49 | def forward(self, is_train, req, in_data, out_data, aux): 50 | assert len(in_data) == 3 51 | assert len(out_data) == 1 52 | assert len(req) == 1 53 | x, label, w = in_data 54 | x = x.asnumpy() 55 | w = w.asnumpy() 56 | label = label.asnumpy() 57 | #print "lsoftmax label", label 58 | eps= 1e-5 59 | # original fully connected 60 | out = x.dot(w.T) 61 | if is_train: 62 | # large margin fully connected 63 | n = label.shape[0] 64 | w_norm = np.linalg.norm(w, axis=1) 65 | x_norm = np.linalg.norm(x, axis=1) 66 | for i in range(n): 67 | j = yi = int(label[i]) 68 | f = out[i, yi] 69 | cos_t = f / (w_norm[yi] * x_norm[i]+eps) 70 | # calc k and cos_mt 71 | k = self.find_k(cos_t) 72 | cos_mt = self.calc_cos_mt(cos_t) 73 | # f_i_j = (\beta * f_i_j + fo_i_j) / (1 + \beta) 74 | fo_i_j = f 75 | f_i_j = (pow(-1, k) * cos_mt - 2*k) * (w_norm[yi] * x_norm[i]) 76 | #print j,yi,cos_t,k,cos_mt,fo_i_j,f_i_j 77 | out[i, yi] = (f_i_j + self.beta * fo_i_j) / (1 + self.beta) 78 | self.assign(out_data[0], req[0], mx.nd.array(out)) 79 | 80 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 81 | assert len(in_data) == 3 82 | assert len(out_grad) == 1 83 | assert len(in_grad) == 3 84 | assert len(req) == 3 85 | x, label, w = in_data 86 | x = x.asnumpy() 87 | w = w.asnumpy() 88 | label = label.asnumpy() 89 | o_grad = out_grad[0].asnumpy() 90 | # original fully connected 91 | x_grad = o_grad.dot(w) 92 | w_grad = o_grad.T.dot(x) 93 | # large margin fully connected 94 | n = label.shape[0] # batch size 95 | m = w.shape[0] # number of classes 96 | margin = self.margin # margin 97 | feature_dim = w.shape[1] # feature dimension 98 | cos_t = np.zeros(n, dtype=np.float32) # cos(theta) 99 | cos_mt = np.zeros(n, dtype=np.float32) # cos(margin * theta) 100 | sin2_t = np.zeros(n, dtype=np.float32) # sin(theta) ^ 2 101 | fo = np.zeros(n, dtype=np.float32) # fo_i = dot(x_i, w_yi) 102 | k = np.zeros(n, dtype=np.int32) 103 | x_norm = np.linalg.norm(x, axis=1) 104 | w_norm = np.linalg.norm(w, axis=1) 105 | eps=1e-5 106 | for i in range(n): 107 | j = yi = int(label[i]) 108 | f = w[yi].dot(x[i]) 109 | cos_t[i] = f / (w_norm[yi] * x_norm[i]+eps) 110 | k[i] = self.find_k(cos_t[i]) 111 | cos_mt[i] = self.calc_cos_mt(cos_t[i]) 112 | sin2_t[i] = 1 - cos_t[i]*cos_t[i] 113 | fo[i] = f 114 | # gradient w.r.t. x_i 115 | for i in range(n): 116 | # df / dx at x = x_i, w = w_yi 117 | j = yi = int(label[i]) 118 | dcos_dx = w[yi] / (w_norm[yi]*x_norm[i]+eps) - x[i] * fo[i] / (w_norm[yi]*pow(x_norm[i], 3)+eps) 119 | dsin2_dx = -2 * cos_t[i] * dcos_dx 120 | dcosm_dx = margin*pow(cos_t[i], margin-1) * dcos_dx # p = 0 121 | flag = 1 122 | for p in range(1, margin / 2 + 1): 123 | flag *= -1 124 | dcosm_dx += flag * self.c_map[2*p] * ( \ 125 | p*pow(cos_t[i], margin-2*p)*pow(sin2_t[i], p-1)*dsin2_dx + \ 126 | (margin-2*p)*pow(cos_t[i], margin-2*p-1)*pow(sin2_t[i], p)*dcos_dx) 127 | df_dx = (pow(-1, k[i]) * cos_mt[i] - 2*k[i]) * w_norm[yi] / (x_norm[i]+eps) * x[i] + \ 128 | pow(-1, k[i]) * w_norm[yi] * x_norm[i] * dcosm_dx 129 | alpha = 1 / (1 + self.beta) 130 | x_grad[i] += alpha * o_grad[i, yi] * (df_dx - w[yi]) 131 | # gradient w.r.t. w_j 132 | for j in range(m): 133 | dw = np.zeros(feature_dim, dtype=np.float32) 134 | for i in range(n): 135 | yi = int(label[i]) 136 | if yi == j: 137 | # df / dw at x = x_i, w = w_yi and yi == j 138 | dcos_dw = x[i] / (w_norm[yi]*x_norm[i]+eps) - w[yi] * fo[i] / (x_norm[i]*pow(w_norm[yi], 3)+eps) 139 | dsin2_dw = -2 * cos_t[i] * dcos_dw 140 | dcosm_dw = margin*pow(cos_t[i], margin-1) * dcos_dw # p = 0 141 | flag = 1 142 | for p in range(1, margin / 2 + 1): 143 | flag *= -1 144 | dcosm_dw += flag * self.c_map[2*p] * ( \ 145 | p*pow(cos_t[i], margin-2*p)*pow(sin2_t[i], p-1)*dsin2_dw + \ 146 | (margin-2*p)*pow(cos_t[i], margin-2*p-1)*pow(sin2_t[i], p)*dcos_dw) 147 | df_dw_j = (pow(-1, k[i]) * cos_mt[i] - 2*k[i]) * x_norm[i] / (w_norm[yi]+eps) * w[yi] + \ 148 | pow(-1, k[i]) * w_norm[yi] * x_norm[i] * dcosm_dw 149 | dw += o_grad[i, yi] * (df_dw_j - x[i]) 150 | alpha = 1 / (1 + self.beta) 151 | w_grad[j] += alpha * dw 152 | #x_grad[:], w_grad[:] = 0, 0 153 | self.assign(in_grad[0], req[0], mx.nd.array(x_grad)) 154 | self.assign(in_grad[2], req[2], mx.nd.array(w_grad)) 155 | # dirty hack, should also work for multi devices 156 | self.beta *= self.scale 157 | self.beta = max(self.beta, self.beta_min) 158 | 159 | 160 | @mx.operator.register("LSoftmax") 161 | class LSoftmaxProp(mx.operator.CustomOpProp): 162 | 163 | def __init__(self, num_hidden, beta, margin, scale=1, beta_min=0): 164 | super(LSoftmaxProp, self).__init__(need_top_grad=True) 165 | self.margin = int(margin) 166 | self.num_hidden = int(num_hidden) 167 | self.beta = float(beta) 168 | self.beta_min = float(beta_min) 169 | self.scale = float(scale) 170 | 171 | def list_arguments(self): 172 | return ['data', 'label', 'weight'] 173 | 174 | def list_outputs(self): 175 | return ['output'] 176 | 177 | def infer_shape(self, in_shape): 178 | #print in_shape 179 | assert len(in_shape) == 3, "LSoftmaxOp input data: [data, label, weight]" 180 | dshape = in_shape[0] 181 | lshape = in_shape[1] 182 | assert len(dshape) == 2, "data shape should be (batch_size, feature_dim)" 183 | assert len(lshape) == 1, "label shape should be (batch_size,)" 184 | wshape = (self.num_hidden, dshape[1]) 185 | oshape = (dshape[0], self.num_hidden) 186 | return [dshape, lshape, wshape], [oshape,], [] 187 | 188 | def create_operator(self, ctx, shapes, dtypes): 189 | return LSoftmaxOp(margin=self.margin, beta=self.beta, beta_min=self.beta_min, scale=self.scale) 190 | 191 | 192 | def test_op(): 193 | """test LSoftmax Operator 194 | """ 195 | # build symbol 196 | batch_size = cmd_args.batch_size 197 | embedding_dim = cmd_args.embedding_dim 198 | num_classes = cmd_args.num_classes 199 | data = mx.sym.Variable('data') 200 | label = mx.sym.Variable('label') 201 | weight = mx.sym.Variable('weight') 202 | args = { 203 | 'data': np.random.normal(0, 1, (batch_size, embedding_dim)), 204 | 'weight': np.random.normal(0, 1, (num_classes, embedding_dim)), 205 | 'label': np.random.choice(num_classes, batch_size), 206 | } 207 | 208 | if cmd_args.op_impl == 'py': 209 | symbol = mx.sym.Custom(data=data, label=label, weight=weight, num_hidden=10, 210 | beta=cmd_args.beta, margin=cmd_args.margin, scale=cmd_args.scale, 211 | op_type='LSoftmax', name='lsoftmax') 212 | else: 213 | symbol = mx.sym.LSoftmax(data=data, label=label, weight=weight, num_hidden=num_classes, 214 | margin=cmd_args.margin, beta=cmd_args.beta, scale=cmd_args.scale, 215 | name='lsoftmax') 216 | 217 | data_shape = (batch_size, embedding_dim) 218 | label_shape = (batch_size,) 219 | weight_shape = (num_classes, embedding_dim) 220 | ctx = mx.cpu() if cmd_args.op_impl == 'py' else mx.gpu() 221 | executor = symbol.simple_bind(ctx=ctx, data=data_shape, label=label_shape, weight=weight_shape) 222 | 223 | def forward(data, label, weight): 224 | data = mx.nd.array(data, ctx=ctx) 225 | label = mx.nd.array(label, ctx=ctx) 226 | weight = mx.nd.array(weight, ctx=ctx) 227 | executor.forward(is_train=True, data=data, label=label, weight=weight) 228 | return executor.output_dict['lsoftmax_output'].asnumpy() 229 | 230 | def backward(out_grad): 231 | executor.backward(out_grads=[mx.nd.array(out_grad, ctx=ctx)]) 232 | return executor.grad_dict 233 | 234 | def gradient_check(name, i, j): 235 | '''gradient check on x[i, j] 236 | ''' 237 | eps = 1e-4 238 | threshold = 1e-2 239 | reldiff = lambda a, b: abs(a-b) / (abs(a) + abs(b)) 240 | # calculate by backward 241 | output = forward(data=args['data'], weight=args['weight'], label=args['label']) 242 | grad_dict = backward(output) 243 | grad = grad_dict[name].asnumpy()[i, j] 244 | # calculate by \delta f / 2 * eps 245 | loss = lambda x: np.square(x).sum() / 2 246 | args[name][i, j] -= eps 247 | loss1 = loss(forward(data=args['data'], weight=args['weight'], label=args['label'])) 248 | args[name][i, j] += 2 * eps 249 | loss2 = loss(forward(data=args['data'], weight=args['weight'], label=args['label'])) 250 | grad_expect = (loss2 - loss1) / (2 * eps) 251 | # check 252 | rel_err = reldiff(grad_expect, grad) 253 | if rel_err > threshold: 254 | print 'gradient check failed' 255 | print 'expected %lf given %lf, relative error %lf'%(grad_expect, grad, rel_err) 256 | return False 257 | else: 258 | print 'gradient check pass' 259 | return True 260 | 261 | # test forward 262 | output = forward(data=args['data'], weight=args['weight'], label=args['label']) 263 | diff = args['data'].dot(args['weight'].T) - output 264 | 265 | # test backward 266 | # gradient check on data 267 | data_gc_pass = 0 268 | for i in range(args['data'].shape[0]): 269 | for j in range(args['data'].shape[1]): 270 | print 'gradient check on data[%d, %d]'%(i, j) 271 | if gradient_check('data', i, j): 272 | data_gc_pass += 1 273 | # gradient check on weight 274 | weight_gc_pass = 0 275 | for i in range(args['weight'].shape[0]): 276 | for j in range(args['weight'].shape[1]): 277 | print 'gradient check on weight[%d, %d]'%(i, j) 278 | if gradient_check('weight', i, j): 279 | weight_gc_pass += 1 280 | print '===== Summary =====' 281 | print 'gradient on data pass ratio is %lf'%(float(data_gc_pass) / args['data'].size) 282 | print 'gradient on weight pass ratio is %lf'%(float(weight_gc_pass) / args['weight'].size) 283 | 284 | 285 | if __name__ == '__main__': 286 | import argparse 287 | 288 | parser = argparse.ArgumentParser() 289 | parser.add_argument('--batch-size', type=int, default=32, help="test batch size") 290 | parser.add_argument('--num-classes', type=int, default=10, help="test number of classes") 291 | parser.add_argument('--embedding-dim', type=int, default=3, help="test embedding dimension") 292 | parser.add_argument('--margin', type=int, default=2, help="test lsoftmax margin") 293 | parser.add_argument('--beta', type=float, default=10, help="test lsoftmax beta") 294 | parser.add_argument('--scale', type=float, default=1, help="beta scale of every mini-batch") 295 | parser.add_argument('--op-impl', type=str, choices=['py', 'cpp'], default='py', help="test op implementation") 296 | cmd_args = parser.parse_args() 297 | print cmd_args 298 | 299 | # check 300 | if cmd_args.op_impl == 'cpp': 301 | try: 302 | op_creator = mx.sym.LSoftmax 303 | except AttributeError: 304 | print 'No cpp operator for LSoftmax, Skip test' 305 | import sys 306 | sys.exit(0) 307 | 308 | test_op() 309 | -------------------------------------------------------------------------------- /baseline/loss_layers.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import numpy as np 3 | 4 | 5 | class VerfiLoss(mx.operator.CustomOp): 6 | ''' 7 | Verfication Loss Layer 8 | ''' 9 | def __init__(self, grad_scale, threshd): 10 | self.grad_scale = grad_scale 11 | self.threshd = threshd 12 | self.eps = 1e-5 13 | 14 | def forward(self, is_train, req, in_data, out_data, aux): 15 | # print "forward" 16 | x = in_data[0] 17 | label = in_data[1].asnumpy() 18 | #print "verifi label", label 19 | n = x.shape[0] 20 | ctx = x.context 21 | # y = out_data[0] 22 | # y[:] = 0 23 | # print y.shape 24 | y = np.zeros((x.shape[0], )) 25 | #y = mx.nd.array((n, ), ctx=ctx) 26 | for i in range(x.shape[0]): 27 | #print "forward", i 28 | mask = np.zeros((n, )) 29 | if i<(x.shape[0]/2): 30 | pid = i + 1 if i % 2 == 0 else i - 1 31 | mask[i] = 1 32 | mask[pid] = 1 33 | #mask[np.where(label == label[i])] = 1 34 | #print mask 35 | pos = np.sum(mask) 36 | mask = mx.nd.array(mask, ctx=ctx) 37 | diff = x[i] - x 38 | d = mx.nd.sqrt(mx.nd.sum(diff * diff, axis=1)) 39 | d1 = mx.nd.maximum(0, self.threshd - d) 40 | z = mx.nd.sum(mask * d * d) / (pos + self.eps) \ 41 | + mx.nd.sum((1 - mask) * d1 * d1) / (n - pos + self.eps) 42 | y[i] = z.asnumpy()[0] 43 | 44 | # y /= x.shape[0] 45 | self.assign(out_data[0], req[0], mx.nd.array(y, ctx=ctx)) 46 | 47 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 48 | # print "backward" 49 | x = in_data[0] 50 | #label = in_data[1].asnumpy() 51 | n = x.shape[0] 52 | ctx = x.context 53 | grad = in_grad[0] 54 | grad[:] = 0 55 | for i in range(x.shape[0]): 56 | mask = np.zeros((1, n)) 57 | #mask[np.where(label == label[i])] = 1 58 | if i<(x.shape[0]/2): 59 | pid = i + 1 if i % 2 == 0 else i - 1 60 | mask[0,i] = 1 61 | mask[0,pid] = 1 62 | pos = np.sum(mask) 63 | mask = mx.nd.array(mask, ctx=ctx) 64 | diff = x[i] - x 65 | d = mx.nd.sqrt(mx.nd.sum(diff * diff, axis=1)) 66 | g1 = mx.nd.minimum(0, (d - self.threshd) / (d + self.eps)) 67 | z = mx.nd.dot((1 - mask) * g1.reshape([1, n]), diff)[0] 68 | # print grad[i].shape, z.shape 69 | # grad[i] = z 70 | # print "z" 71 | grad[i] = mx.nd.dot(mask, diff)[0] / (pos + self.eps)\ 72 | + mx.nd.dot((1 - mask) * g1.reshape([1, n]), diff)[0] / (n - pos + self.eps) 73 | 74 | grad *= self.grad_scale 75 | 76 | 77 | 78 | @mx.operator.register("verifiLoss") 79 | class VerifiLossProp(mx.operator.CustomOpProp): 80 | def __init__(self, grad_scale=1.0, threshd=0.5): 81 | super(VerifiLossProp, self).__init__(need_top_grad=False) 82 | self.grad_scale = float(grad_scale) 83 | self.threshd = float(threshd) 84 | 85 | def list_arguments(self): 86 | return ['data', 'label'] 87 | 88 | def list_outputs(self): 89 | return ['output'] 90 | 91 | def infer_shape(self, in_shape): 92 | data_shape = in_shape[0] 93 | label_shape = (in_shape[0][0], ) 94 | output_shape = (in_shape[0][0], ) 95 | return [data_shape, label_shape], [output_shape] 96 | 97 | def create_operator(self, ctx, shapes, dtypes): 98 | return VerfiLoss(self.grad_scale, self.threshd) 99 | 100 | 101 | class TripletLoss(mx.operator.CustomOp): 102 | ''' 103 | Triplet loss layer 104 | ''' 105 | def __init__(self, grad_scale=1.0, threshd=0.5): 106 | self.grad_scale = grad_scale 107 | self.threshd = threshd 108 | 109 | def forward(self, is_train, req, in_data, out_data, aux): 110 | x = in_data[0] 111 | y = np.zeros((x.shape[0], )) 112 | ctx = x.context 113 | for i in range(x.shape[0] / 2): 114 | pid = i + 1 if i % 2 == 0 else i - 1 115 | nid = i + int(x.shape[0] / 2) 116 | pdiff = x[i] - x[pid] 117 | ndiff = x[i] - x[nid] 118 | y[i] = mx.nd.sum(pdiff * pdiff).asnumpy()[0] -\ 119 | mx.nd.sum(ndiff * ndiff).asnumpy()[0] + self.threshd 120 | if y[i] < 0: 121 | y[i] = 0 122 | # y /= x.shape[0] 123 | self.assign(out_data[0], req[0], mx.nd.array(y, ctx=ctx)) 124 | 125 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 126 | x = in_data[0] 127 | y = out_data[0] 128 | grad = in_grad[0] 129 | grad[:] = 0 130 | for i in range(x.shape[0] / 2): 131 | pid = i + 1 if i % 2 == 0 else i - 1 132 | nid = i + int(x.shape[0] / 2) 133 | 134 | if y[i] > 0: 135 | grad[i] += x[nid] - x[pid] 136 | grad[pid] += x[pid] - x[i] 137 | grad[nid] += x[i] - x[nid] 138 | 139 | grad *= self.grad_scale 140 | 141 | 142 | 143 | @mx.operator.register("tripletLoss") 144 | class TripletLossProp(mx.operator.CustomOpProp): 145 | def __init__(self, grad_scale=1.0, threshd=0.5): 146 | super(TripletLossProp, self).__init__(need_top_grad=False) 147 | self.grad_scale = float(grad_scale) 148 | self.threshd = float(threshd) 149 | 150 | def list_arguments(self): 151 | return ['data'] 152 | 153 | def list_outputs(self): 154 | return ['output'] 155 | 156 | def infer_shape(self, in_shape): 157 | data_shape = in_shape[0] 158 | # label_shape = (in_shape[0][0], ) 159 | output_shape = (in_shape[0][0], ) 160 | return [data_shape], [output_shape] 161 | 162 | def create_operator(self, ctx, shapes, dtypes): 163 | return TripletLoss(self.grad_scale, self.threshd) 164 | 165 | 166 | class CenterLoss(mx.operator.CustomOp): 167 | def __init__(self, ctx, shapes, dtypes, num_class, alpha, scale=1.0): 168 | if not len(shapes[0]) ==2: 169 | raise ValuerError('dim for input_data should be 2 for CenterLoss') 170 | 171 | self.alpha = alpha 172 | self.batch_size = shapes[0][0] 173 | self.num_class = num_class 174 | self.scale = scale 175 | 176 | def forward(self, is_train, req, in_data, out_data, aux): 177 | x=in_data[0] 178 | labels = in_data[1].asnumpy() 179 | #print "center label", labels 180 | diff = aux [0] 181 | center = aux[1] 182 | #loss=np.zeros((self.batch_size,1)) 183 | 184 | for i in range(self.batch_size): 185 | diff[i] = in_data[0][i] - center[int(labels[i])] 186 | 187 | loss = mx.nd.sum(mx.nd.square(diff),axis=1) / self.batch_size /2 188 | self.assign(out_data[0], req[0], loss) 189 | 190 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 191 | diff = aux[0] 192 | center = aux[1] 193 | sum_ = aux[2] 194 | 195 | grad_scale = float(self.scale/self.batch_size) 196 | self.assign(in_grad[0], req[0], diff * grad_scale) 197 | 198 | 199 | #update the center 200 | labels = in_data[1].asnumpy() 201 | label_occur = dict() 202 | for i, label in enumerate(labels): 203 | label_occur.setdefault(int(label), []).append(i) 204 | 205 | for label, sample_index in label_occur.items(): 206 | sum_[:] = 0 207 | for i in sample_index: 208 | sum_ = sum_ + diff[i] 209 | delta_c = sum_ /(1+len(sample_index)) 210 | center[label] += self.alpha * delta_c 211 | 212 | @mx.operator.register("centerLoss") 213 | class CenterLossProp(mx.operator.CustomOpProp): 214 | def __init__(self, num_class, alpha, scale=1.0, batchsize=32): 215 | super(CenterLossProp, self).__init__(need_top_grad=False) 216 | 217 | self.num_class = int(num_class) 218 | self.alpha = float(alpha) 219 | self.scale = float(scale) 220 | self.batchsize = int(batchsize) 221 | 222 | def list_arguments(self): 223 | return ['data', 'label'] 224 | 225 | def list_outputs(self): 226 | return ['output'] 227 | 228 | def list_auxiliary_states(self): 229 | return ['diff_bias', 'center_bias', 'sum_bias'] 230 | 231 | def infer_shape(self, in_shape): 232 | data_shape = in_shape[0] 233 | label_shape = (in_shape[0][0], ) 234 | 235 | #store diff, same shape as input batch 236 | diff_shape = [self.batchsize, data_shape[1]] 237 | 238 | #store the center of each clss, should be (num_class, d) 239 | center_shape = [self.num_class, diff_shape[1]] 240 | 241 | #computation buf 242 | sum_shape = [diff_shape[1], ] 243 | 244 | output_shape = (in_shape[0][0], ) 245 | 246 | return [data_shape, label_shape], [output_shape], [diff_shape, center_shape, sum_shape] 247 | 248 | def create_operator(self, ctx, shapes, dtypes): 249 | return CenterLoss(ctx, shapes, dtypes, self.num_class, self.alpha, self.scale) 250 | 251 | 252 | 253 | class lmnnLoss(mx.operator.CustomOp): 254 | ''' 255 | LMNN Loss Layer = positive pairwise loss + triplet loss 256 | ''' 257 | def __init__(self, epsilon, threshd): 258 | self.epsilon= epsilon #epsilon is the trade-off parameter between positive pairwise and triplet loss(1: epsilon) 259 | self.threshd = threshd 260 | #self.pnr = pnr 261 | 262 | def forward(self, is_train, req, in_data, out_data, aux): 263 | # print "forward" 264 | x = in_data[0] 265 | #label=in_data[1].asnumpy() 266 | ctx = x.context 267 | y = mx.nd.zeros((x.shape[0], ), ctx=ctx) 268 | halfsize = x.shape[0]/2 269 | for i in range(halfsize): 270 | pid = i + 1 if i % 2 == 0 else i - 1 271 | pdiff = x[i] - x[pid] 272 | pdist = 0.5*mx.nd.sum(pdiff * pdiff) 273 | mask = np.ones((x.shape[0],)) #index mask for negative examples 274 | mask[i] = 0 275 | mask[pid] = 0 276 | mask = mx.nd.array(mask, ctx=ctx) 277 | ndiff = x[i] - x 278 | ndist = 0.5*mx.nd.sum(ndiff*ndiff,axis=1) 279 | distdiff = (pdist - ndist +self.threshd)*mask 280 | distdiff = mx.nd.sum(mx.nd.maximum(0, distdiff))/mx.nd.sum(mask) 281 | y[i] = pdist+self.epsilon*distdiff 282 | 283 | self.assign(out_data[0], req[0], y) 284 | 285 | 286 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux): 287 | # print "backward" 288 | x = in_data[0] 289 | #label = in_data[1].asnumpy() 290 | ctx = x.context 291 | grad = in_grad[0] 292 | grad[:] = 0 293 | batchsize = x.shape[0] 294 | #label = in_data[1] 295 | #xhalf=x[halfsize:x.shape[0]] 296 | 297 | for i in range(batchsize/2): 298 | #print "gradient computation", i 299 | pid = i + 1 if i % 2 == 0 else i - 1 300 | grad[i] += x[i] - x[pid] 301 | grad[pid] += x[pid] - x[i] 302 | 303 | #pnr_index = np.random.binomial(n=1, p=self.pnr/batchsize, size=batchsize) 304 | #print pnr_index 305 | mask = np.ones((batchsize,)) #index mask for negative examples 306 | mask[i] = 0 307 | mask[pid] = 0 308 | #mask=mask * pnr_index 309 | #print mask 310 | 311 | pdiff = x[i] - x[pid] 312 | pdist = 0.5 * mx.nd.sum(pdiff * pdiff) 313 | ndiff = x[i] - x 314 | ndist = 0.5 * mx.nd.sum(ndiff * ndiff,axis=1) 315 | distdiff = pdist - ndist + self.threshd 316 | 317 | index = np.zeros((batchsize, )) 318 | index[np.where(distdiff.asnumpy()>0)]=1 319 | index=index * mask 320 | index=mx.nd.array(index,ctx=ctx) 321 | #print index 322 | 323 | ratio = distdiff * index / (mx.nd.sum(distdiff * index)+1e-5) 324 | ratio = mx.nd.Reshape(ratio, shape=(batchsize,1)) 325 | #print ratio.asnumpy() 326 | ratio = mx.nd.broadcast_axis(ratio, axis=1, size=x.shape[1]) 327 | #print ratio.asnumpy() 328 | 329 | grad[i] += mx.nd.sum((x-x[pid]) * ratio, axis=0) * self.epsilon 330 | grad[pid] += (x[pid]-x[i]) * self.epsilon * (mx.nd.sum(distdiff * index)/(mx.nd.sum(distdiff * index)+1e-5)) 331 | grad += (x[i]-x) * ratio * self.epsilon 332 | 333 | self.assign(in_grad[0], req[0], grad) 334 | 335 | @mx.operator.register("lmnnLoss") 336 | class lmnnLossProp(mx.operator.CustomOpProp): 337 | def __init__(self, epsilon=1.0, threshd=0.5): 338 | super(lmnnLossProp, self).__init__(need_top_grad=False) 339 | self.epsilon = float(epsilon) 340 | self.threshd = float(threshd) 341 | #self.pnr = float(pnr) #positive examples:negetive examples=1:pnr 342 | 343 | def list_arguments(self): 344 | return ['data'] # 'label'] 345 | 346 | def list_outputs(self): 347 | return ['output'] 348 | 349 | def infer_shape(self, in_shape): 350 | data_shape = in_shape[0] 351 | #label_shape = (in_shape[0][0], ) 352 | output_shape = (in_shape[0][0], ) 353 | return [data_shape], [output_shape] 354 | 355 | def create_operator(self, ctx, shapes, dtypes): 356 | return lmnnLoss(self.epsilon, self.threshd) 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | -------------------------------------------------------------------------------- /RL/dqn_mars.py: -------------------------------------------------------------------------------- 1 | import sys 2 | #sys.path.insert(0, "mxnet/python/") 3 | import find_mxnet 4 | import mxnet as mx 5 | 6 | from tensorboard import SummaryWriter 7 | import logging 8 | import numpy as np 9 | import argparse 10 | import random 11 | import math 12 | 13 | from batch_provider_mars import BatchProvider 14 | from utils import get_imRecordIter 15 | from replay_memory import ReplayMemory 16 | from tb_system import TensorBoardSystem 17 | from agent import Agent 18 | import glob 19 | 20 | 21 | def parse_args(): 22 | parser = argparse.ArgumentParser( 23 | description='multishot recog training') 24 | parser.add_argument('--gpus', type=str, default='1', 25 | help='the gpus will be used, e.g "0,1"') 26 | parser.add_argument('--data-dir', type=str, 27 | default="/data3/matt/MARS", 28 | help='data directory') 29 | parser.add_argument('--num-examples', type=int, default=10000, 30 | help='the number of training examples') 31 | parser.add_argument('--num-id', type=int, default=624, 32 | help='the number of training ids') 33 | parser.add_argument('--batch-size', type=int, default=16, 34 | help='the batch size') 35 | parser.add_argument('--sample-size', type=int, default=4, 36 | help='sample frames from each video') 37 | parser.add_argument('--patch-size', type=int, default=4, 38 | help='size of single image patch from video') 39 | parser.add_argument('--lr', type=float, default=1e-2, 40 | help='the initial learning rate') 41 | parser.add_argument('--num-epoches', type=int, default=100, 42 | help='the number of training epochs') 43 | parser.add_argument('--mode', type=str, default='prid_video_match_%d-%d' % (4, 4), 44 | help='save names of model and log') 45 | parser.add_argument('--verifi-threshd', type=float, default=0.9 + 2.3, 46 | help='verification threshold') 47 | parser.add_argument('--kv-store', type=str, 48 | default='device', help='the kvstore type') 49 | parser.add_argument('--network', type=str, 50 | default='inception-bn', help='network name') 51 | parser.add_argument('--model-load-epoch', type=int, default=1, 52 | help='load the model on an epoch using the model-load-prefix') 53 | parser.add_argument('--model-load-prefix', type=str, default='mars_baseline_b4', 54 | help='load model prefix') 55 | parser.add_argument('--q_duel', action='store_true', default=False, 56 | help='if use duel network') 57 | parser.add_argument('--q_double', action='store_true', default=False, 58 | help='if use double DQN') 59 | parser.add_argument('--q-weight', type=float, default=1.0, 60 | help='DQN loss weight') 61 | parser.add_argument('--q-gamma', type=float, default=0.99, 62 | help='DQN decay rate') 63 | parser.add_argument('--penalty', type=float, default=0.1, 64 | help='DQN unsure penalty rate') 65 | parser.add_argument('--ob-epochs', type=int, default=1, 66 | help='DQN observing epochs') 67 | parser.add_argument('--num_acts', type=int, default=3, 68 | help='number of actions') 69 | parser.add_argument('--acts_per_round', type=int, default=3, 70 | help='number of actions per round') 71 | parser.add_argument('--fix_gamma', action='store_true', default=False, 72 | help='if fix_gamma in bn') 73 | parser.add_argument('--fix_penalty', action='store_true', default=False, 74 | help='if fix penalty') 75 | parser.add_argument('--no_sim', action='store_true', default=False, 76 | help='if no sim net') 77 | parser.add_argument('--num_hidden', type=int, default=128, 78 | help='number of hidden neurons in Q learning fc layers') 79 | parser.add_argument('--target_freq', type=int, default=500, 80 | help='number of hidden neurons in Q learning fc layers') 81 | parser.add_argument('--tisr', type=int, default=1, 82 | help='time inverse lr step') 83 | parser.add_argument('--num_sim', type=int, default=128, 84 | help='number of hidden neurons in similarity network') 85 | parser.add_argument('--lr_step', type=str, default='100,200', 86 | help='number of epoches to shrink lr') 87 | parser.add_argument('--q_bn', action='store_true', default=False, 88 | help='if add bn in qnet') 89 | parser.add_argument('--maxout', action='store_true', default=False, 90 | help='if add maxout in qnet') 91 | parser.add_argument('--pr_alpha', type=float, default=0.6, 92 | help='prioritized-replay alpha') 93 | parser.add_argument('--pr_beta', type=float, default=0.4, 94 | help='prioritized-replay beta') 95 | parser.add_argument('--add_rewards', action='store_true', default=False, 96 | help='if add rewards for single agent') 97 | parser.add_argument('--epsilon', action='store_true', default=False, 98 | help='if epsilon learning') 99 | parser.add_argument('--pos_weight', type=float, default=1.0, 100 | help='positive rewards weight') 101 | parser.add_argument('--e2e', action='store_true', default=False, 102 | help='if e2e') 103 | parser.add_argument('--history', action='store_true', default=False, 104 | help='if use history') 105 | parser.add_argument('--optimizer', type=str, default='sgd', 106 | help='choose the optimizer in {sgd, adam, rms}') 107 | parser.add_argument('--memory_size', type=int, default=1000, 108 | help='memory buffer size') 109 | parser.add_argument('--final_epsilon', type=float, default=0.1, 110 | help='final epsilon for exploration') 111 | parser.add_argument('--exp_ratio', type=float, default=0.1, 112 | help='ratio for exploration in whole training process') 113 | parser.add_argument('--hinge', action='store_true', default=False, 114 | help='if use hinge loss') 115 | parser.add_argument('--train-set', type=str, default='image_valid', 116 | help='load model prefix') 117 | parser.add_argument('--valid-set', type=str, default='image_test', 118 | help='load model prefix') 119 | parser.add_argument('--min-states', type=int, default=4, 120 | help='minimum states for history') 121 | parser.add_argument('--min-imgs', type=int, default=1, 122 | help='minimum imgs for each state') 123 | parser.add_argument('--precomputed', action='store_true', default=False, 124 | help='if feature precomputed') 125 | parser.add_argument('--fusion', action='store_true', default=False, 126 | help='if use data fusion') 127 | parser.add_argument('--total-forward', action='store_true', default=False, 128 | help='if use data fusion') 129 | parser.add_argument('--verbose', action='store_true', default=False, 130 | help='if print debug info') 131 | parser.add_argument('--avg-dqn-k', type=int, default=5, 132 | help='number of target networks for avg-dqn') 133 | return parser.parse_args() 134 | 135 | 136 | args = parse_args() 137 | logging.basicConfig(filename='log/%s.log' % args.mode, level=logging.DEBUG) 138 | logger = logging.getLogger() 139 | logger.setLevel(logging.DEBUG) 140 | logging.info(args) 141 | logdir = './tblog/' + args.mode 142 | summary_writer = SummaryWriter(logdir) 143 | monitor_writer = SummaryWriter('./molog/' + args.mode) 144 | print args 145 | batch_size = args.batch_size 146 | num_epoch = args.num_epoches 147 | devices = [mx.gpu(int(i)) for i in args.gpus.split(',')] 148 | lr = args.lr 149 | 150 | agent = Agent(args, devices[0]) 151 | 152 | 153 | prefix = 'models/%s' % args.mode 154 | 155 | memory = ReplayMemory(replay_size=args.memory_size, alpha=args.pr_alpha) 156 | tbs_V = TensorBoardSystem('mars', summary_writer) 157 | 158 | 159 | def get_feat(dataset, is_train=False): 160 | ret = [] 161 | for i in xrange(1501): 162 | cur = [] 163 | for j in xrange(1, 7): 164 | images = glob.glob('%s/recs/%s/id_%d_%d*' % (args.data_dir, dataset, i, j)) 165 | if len(images) == 0: 166 | continue 167 | cam = [] 168 | for k in images: 169 | bs, flst = 0, open(k) 170 | for line in flst: 171 | bs += 1 172 | org_iter = get_imRecordIter( 173 | args, k[len(args.data_dir)+1:-4], (3, 224, 112), 1, 174 | shuffle=is_train, aug=is_train, even_iter=True) 175 | cam.append(org_iter) 176 | if len(cam) > 0: 177 | cur.append(cam) 178 | if len(cur) > 0: 179 | ret.append(cur) 180 | return ret 181 | 182 | trainF = get_feat('train', True) 183 | 184 | train = BatchProvider(trainF, True, args.sample_size, sample_ratio=0.5, need_feat=args.history) 185 | batch_size = args.batch_size 186 | N = args.num_id 187 | 188 | iterations = args.num_examples 189 | memory = ReplayMemory(replay_size=args.memory_size, alpha=args.pr_alpha) 190 | epsilon = 1.0 191 | final_epsilon = args.final_epsilon 192 | rand_ep, fix_ep = 0, int(args.num_epoches * args.exp_ratio) 193 | epsilon_shr = (epsilon - final_epsilon) / (fix_ep - rand_ep) / iterations 194 | max_penalty = 1 195 | 196 | for e in xrange(args.num_epoches): 197 | if args.verbose: 198 | print 'Epoch', e 199 | for batch in xrange(iterations): 200 | if args.verbose: 201 | print 'Epoch', e, 'batch', batch 202 | cur, a, b = train.provide() 203 | y = ((a %N) == (b % N)) 204 | data_batch = agent.wash_data(cur) 205 | Qvalue = agent.get_Qvalue(data_batch, use_target=False, is_train=False) 206 | if args.verbose: 207 | print 'forward', Qvalue 208 | qs = agent.Q.get_outputs()[1].asnumpy() 209 | print qs 210 | print qs.max(), qs.min(), qs.mean(), qs.std() 211 | Qvalue_softmax = mx.nd.SoftmaxActivation(mx.nd.array(Qvalue, ctx=devices[0]) / epsilon / 5).asnumpy() 212 | reward, action, i = [0 for _ in xrange(args.min_imgs)], [-1 for _ in xrange(args.min_imgs)], args.min_imgs 213 | while i < args.sample_size: 214 | if args.total_forward: 215 | if i + 1 < args.sample_size: 216 | k = 2 217 | else: 218 | Q_choice = np.argmax(Qvalue[i, :2]) if args.epsilon else np.random.choice(args.num_acts, 1, p=Qvalue_softmax[i, :2])[0] 219 | if random.random() <= epsilon and args.epsilon: 220 | k = random.randrange(2) 221 | else: 222 | k = Q_choice 223 | else: 224 | Q_choice = np.argmax(Qvalue[i]) if args.epsilon else np.random.choice(args.num_acts, 1, p=Qvalue_softmax[i])[0] 225 | if random.random() <= epsilon and args.epsilon: 226 | k = random.randrange(args.num_acts) 227 | else: 228 | k = Q_choice 229 | cls = k % args.acts_per_round 230 | step = k - 1 231 | if cls >= 2: 232 | if i + step >= args.sample_size: 233 | r = -max_penalty 234 | terminal = True 235 | else: 236 | r = -args.penalty * (2.0 - (0.5 ** (step - 1))) 237 | terminal = False 238 | else: 239 | r = 1 if cls == y else -max_penalty #(-10 if y else -10) 240 | terminal = True 241 | if args.pos_weight > 1: 242 | if y: 243 | r *= args.pos_weight 244 | else: 245 | if not y: 246 | r /= args.pos_weight 247 | reward.append(r) 248 | action.append(k) 249 | if args.verbose: 250 | print i, (a, b), Qvalue[i], k, (y, cls), r 251 | tbsQvalue = np.zeros(3) 252 | tbsQvalue[min(2, cls)] = Qvalue[i, k] 253 | tbs_V.put_board(tbsQvalue, min(2, cls), y, r, epsilon, i + 1, dummy=False) 254 | if terminal: 255 | break 256 | i += step 257 | memory.add(dict(cur = cur, reward=reward, action=action, y=y, cnt=1)) 258 | if rand_ep <= e < fix_ep: 259 | epsilon -= epsilon_shr 260 | epsilon = max(epsilon, final_epsilon) 261 | if e * args.num_examples + batch < 50:#args.num_examples / 2: 262 | continue 263 | 264 | replays, idxes, weights = memory.sample(args.batch_size, args.pr_beta) 265 | new_weights = [] 266 | for b in xrange(args.batch_size): 267 | cur, reward, action, y = replays[b]['cur'], replays[b]['reward'], replays[b]['action'], replays[b]['y'] 268 | data_batch, delta_sum = agent.wash_data(cur), 0 269 | Qvalue = agent.get_Qvalue(data_batch, use_target=False, is_train=True) 270 | grad, r, grad_norm = np.zeros((args.sample_size, args.num_acts)), 0, 0 271 | t = args.min_imgs 272 | for i in xrange(len(action) - 1): 273 | t += action[i] - 1 274 | for i in xrange(len(action) - 1, -1, -1): 275 | if i < len(action) - 1: 276 | r = reward[i] + args.q_gamma * max(last_Q)#min(1.0, max(last_Q)) 277 | else: 278 | r = reward[i] 279 | last_Q = Qvalue[t] 280 | if args.verbose: 281 | print i, t, action[i], y, Qvalue[t], r, 282 | delta = -r + Qvalue[t, action[i]] 283 | if not args.total_forward: 284 | delta /= len(action) 285 | if abs(delta) > 1: 286 | delta /= abs(delta) 287 | if args.hinge: 288 | if (y and action == 1 or not y and action == 0) and delta > 0: 289 | clipped_delta = 0 290 | elif (y and action == 0 or not y and action == 1) and delta < 0: 291 | clipped_delta = 0 292 | else: 293 | clipped_delta = delta 294 | else: 295 | clipped_delta = delta 296 | grad[t, action[i]] = clipped_delta 297 | grad_norm += (clipped_delta) * (clipped_delta) 298 | if args.verbose: 299 | print delta, grad[i] 300 | delta_sum += abs(delta) 301 | if i > 0: 302 | t -= (action[i - 1] - 1) 303 | if args.total_forward: 304 | break 305 | new_weights.append(1) 306 | replays[b]['cnt'] += 1 307 | replays[b]['delta'] = delta 308 | grad_norm = math.sqrt(grad_norm) 309 | if args.verbose: 310 | print 'grad norm =', grad_norm 311 | agent.update([mx.nd.array(grad, ctx=devices[0]), mx.nd.zeros(agent.Q.get_outputs()[1].shape, ctx=devices[0])]) 312 | memory.update_priorities(idxes, new_weights) 313 | if args.verbose: 314 | print 'gamma =', args.q_gamma, 'epsilon =', epsilon 315 | if (1+batch) % 100 == 0: 316 | tbs_V.print_board() 317 | if (e+1) % 1 == 0: 318 | agent.save(e+1) 319 | -------------------------------------------------------------------------------- /RL/dqn.py: -------------------------------------------------------------------------------- 1 | import sys 2 | #sys.path.insert(0, "mxnet/python/") 3 | import find_mxnet 4 | import mxnet as mx 5 | 6 | from tensorboard import SummaryWriter 7 | import logging 8 | import numpy as np 9 | import argparse 10 | import random 11 | import math 12 | from sklearn.metrics import average_precision_score 13 | 14 | from batch_provider import BatchProvider 15 | from utils import get_imRecordIter 16 | from replay_memory import ReplayMemory 17 | from tb_system import TensorBoardSystem 18 | from agent import Agent 19 | 20 | 21 | def parse_args(): 22 | parser = argparse.ArgumentParser( 23 | description='multishot recog training') 24 | parser.add_argument('--gpus', type=str, default='1', 25 | help='the gpus will be used, e.g "0,1"') 26 | parser.add_argument('--data-dir', type=str, 27 | default="/data3/matt/prid_2011", 28 | help='data directory') 29 | parser.add_argument('--num-examples', type=int, default=10000, 30 | help='the number of training examples') 31 | parser.add_argument('--num-train', type=int, default=20504, 32 | help='the number of training examples') 33 | parser.add_argument('--num-valid', type=int, default=19529, 34 | help='the number of training examples') 35 | parser.add_argument('--num-id', type=int, default=10, 36 | help='the number of training ids') 37 | parser.add_argument('--batch-size', type=int, default=16, 38 | help='the batch size') 39 | parser.add_argument('--sample-size', type=int, default=4, 40 | help='sample frames from each video') 41 | parser.add_argument('--patch-size', type=int, default=1, 42 | help='size of single image patch from video') 43 | parser.add_argument('--lr', type=float, default=1e-2, 44 | help='the initial learning rate') 45 | parser.add_argument('--num-epoches', type=int, default=100, 46 | help='the number of training epochs') 47 | parser.add_argument('--mode', type=str, default='prid_video_match_%d-%d' % (4, 4), 48 | help='save names of model and log') 49 | parser.add_argument('--verifi-threshd', type=float, default=0.9 + 2.3, 50 | help='verification threshold') 51 | parser.add_argument('--kv-store', type=str, 52 | default='device', help='the kvstore type') 53 | parser.add_argument('--network', type=str, 54 | default='inception-bn', help='network name') 55 | parser.add_argument('--model-load-epoch', type=int, default=1, 56 | help='load the model on an epoch using the model-load-prefix') 57 | parser.add_argument('--model-load-prefix', type=str, default='prid_baseline_b4', 58 | help='load model prefix') 59 | parser.add_argument('--q_duel', action='store_true', default=False, 60 | help='if use duel network') 61 | parser.add_argument('--q_double', action='store_true', default=False, 62 | help='if use double DQN') 63 | parser.add_argument('--q-weight', type=float, default=1.0, 64 | help='DQN loss weight') 65 | parser.add_argument('--q-gamma', type=float, default=0.99, 66 | help='DQN decay rate') 67 | parser.add_argument('--penalty', type=float, default=0.1, 68 | help='DQN unsure penalty rate') 69 | parser.add_argument('--ob-epochs', type=int, default=1, 70 | help='DQN observing epochs') 71 | parser.add_argument('--num_acts', type=int, default=3, 72 | help='number of actions') 73 | parser.add_argument('--acts_per_round', type=int, default=3, 74 | help='number of actions per round') 75 | parser.add_argument('--fix_gamma', action='store_true', default=False, 76 | help='if fix_gamma in bn') 77 | parser.add_argument('--fix_penalty', action='store_true', default=False, 78 | help='if fix penalty') 79 | parser.add_argument('--no_sim', action='store_true', default=False, 80 | help='if no sim net') 81 | parser.add_argument('--num_hidden', type=int, default=128, 82 | help='number of hidden neurons in Q learning fc layers') 83 | parser.add_argument('--target_freq', type=int, default=500, 84 | help='number of hidden neurons in Q learning fc layers') 85 | parser.add_argument('--tisr', type=int, default=1, 86 | help='time inverse lr step') 87 | parser.add_argument('--num_sim', type=int, default=128, 88 | help='number of hidden neurons in similarity network') 89 | parser.add_argument('--lr_step', type=str, default='50,75', 90 | help='number of epoches to shrink lr') 91 | parser.add_argument('--q_bn', action='store_true', default=False, 92 | help='if add bn in qnet') 93 | parser.add_argument('--maxout', action='store_true', default=False, 94 | help='if add maxout in qnet') 95 | parser.add_argument('--pr_alpha', type=float, default=0.6, 96 | help='prioritized-replay alpha') 97 | parser.add_argument('--pr_beta', type=float, default=0.4, 98 | help='prioritized-replay beta') 99 | parser.add_argument('--add_rewards', action='store_true', default=False, 100 | help='if add rewards for single agent') 101 | parser.add_argument('--epsilon', action='store_true', default=False, 102 | help='if epsilon learning') 103 | parser.add_argument('--pos_weight', type=float, default=1.0, 104 | help='positive rewards weight') 105 | parser.add_argument('--e2e', action='store_true', default=False, 106 | help='if e2e') 107 | parser.add_argument('--history', action='store_true', default=False, 108 | help='if use history') 109 | parser.add_argument('--optimizer', type=str, default='sgd', 110 | help='choose the optimizer in {sgd, adam, rms}') 111 | parser.add_argument('--memory_size', type=int, default=1000, 112 | help='memory buffer size') 113 | parser.add_argument('--final_epsilon', type=float, default=0.1, 114 | help='final epsilon for exploration') 115 | parser.add_argument('--exp_ratio', type=float, default=0.1, 116 | help='ratio for exploration in whole training process') 117 | parser.add_argument('--hinge', action='store_true', default=False, 118 | help='if use hinge loss') 119 | parser.add_argument('--train-set', type=str, default='image_valid', 120 | help='load model prefix') 121 | parser.add_argument('--valid-set', type=str, default='image_test', 122 | help='load model prefix') 123 | parser.add_argument('--min-states', type=int, default=4, 124 | help='minimum states for history') 125 | parser.add_argument('--min-imgs', type=int, default=0, 126 | help='minimum imgs for each state') 127 | parser.add_argument('--precomputed', action='store_true', default=False, 128 | help='if feature precomputed') 129 | parser.add_argument('--fusion', action='store_true', default=False, 130 | help='if use data fusion') 131 | parser.add_argument('--total-forward', action='store_true', default=False, 132 | help='if use data fusion') 133 | parser.add_argument('--verbose', action='store_true', default=False, 134 | help='if print debug info') 135 | parser.add_argument('--crossvalid', action='store_true', default=False, 136 | help='if do cross validation') 137 | return parser.parse_args() 138 | 139 | 140 | args = parse_args() 141 | logging.basicConfig(filename='log/%s.log' % args.mode, level=logging.DEBUG) 142 | logger = logging.getLogger() 143 | logger.setLevel(logging.DEBUG) 144 | logging.info(args) 145 | logdir = './tblog/' + args.mode 146 | print args 147 | batch_size = args.batch_size 148 | num_epoch = args.num_epoches 149 | devices = [mx.gpu(int(i)) for i in args.gpus.split(',')] 150 | lr = args.lr 151 | 152 | agent = Agent(args, devices[0]) 153 | 154 | prefix = 'models/%s' % args.mode 155 | if args.crossvalid: 156 | summary_writer = SummaryWriter(logdir) 157 | tbs_V = TensorBoardSystem('V', summary_writer) 158 | 159 | valid_iter = get_imRecordIter( 160 | args, 'recs/%s'%args.valid_set, (3, 224, 112), 1, 161 | shuffle=False, aug=False, even_iter=True) 162 | train_iter = get_imRecordIter( 163 | args, 'recs/%s'%args.train_set, (3, 224, 112), 1, 164 | shuffle=False, aug=True, even_iter=True) 165 | 166 | valid_lst = np.loadtxt('%s/recs/%s.txt'%(args.data_dir, args.valid_set)).astype(int) 167 | train_lst = np.loadtxt('%s/recs/%s.txt'%(args.data_dir, args.train_set)).astype(int) 168 | 169 | valid = BatchProvider(valid_iter, valid_lst, False, args.sample_size, sample_ratio=0.5, is_valid=True, need_feat=args.history) 170 | train = BatchProvider(train_iter, train_lst, True, args.sample_size, sample_ratio=0.5, need_feat=args.history) 171 | N = args.num_id 172 | 173 | cmcs, ap, cmcn, vscores, vturns = [[], [], [], []], [], [1, 5, 10, 20], [], [] 174 | 175 | iterations = args.num_examples 176 | memory = ReplayMemory(replay_size=args.memory_size, alpha=args.pr_alpha) 177 | epsilon = 1.0 178 | final_epsilon = args.final_epsilon 179 | rand_ep, fix_ep = 0, int(args.num_epoches * args.exp_ratio) 180 | epsilon_shr = (epsilon - final_epsilon) / (fix_ep - rand_ep) / iterations 181 | 182 | max_penalty = 1 183 | 184 | frf = open(('figurelog/%s' % args.mode), 'w') 185 | 186 | for e in xrange(args.num_epoches): 187 | if args.verbose: 188 | print 'Epoch', e 189 | for batch in xrange(iterations): 190 | if args.verbose: 191 | print 'Epoch', e, 'batch', batch 192 | cur, a, b, cur_id = train.provide() 193 | y = ((a %N) == (b % N)) 194 | data_batch = agent.wash_data(cur) 195 | Qvalue = agent.get_Qvalue(data_batch, is_train=False) 196 | if args.verbose: 197 | print 'forward', Qvalue 198 | qs = agent.Q.get_outputs()[1].asnumpy() 199 | print qs 200 | print qs.max(), qs.min(), qs.mean(), qs.std() 201 | Qvalue_softmax = mx.nd.SoftmaxActivation(mx.nd.array(Qvalue, ctx=devices[0]) / epsilon / 5).asnumpy() 202 | reward, action, i = [0 for _ in xrange(args.min_imgs)], [-1 for _ in xrange(args.min_imgs)], args.min_imgs 203 | while i < args.sample_size: 204 | if args.total_forward: 205 | if i + 1 < args.sample_size: 206 | k = 2 207 | else: 208 | Q_choice = np.argmax(Qvalue[i, :2]) if args.epsilon else np.random.choice(args.num_acts, 1, p=Qvalue_softmax[i, :2])[0] 209 | if random.random() <= epsilon and args.epsilon: 210 | k = random.randrange(2) 211 | else: 212 | k = Q_choice 213 | else: 214 | Q_choice = np.argmax(Qvalue[i]) if args.epsilon else np.random.choice(args.num_acts, 1, p=Qvalue_softmax[i])[0] 215 | if random.random() <= epsilon and args.epsilon: 216 | k = random.randrange(args.num_acts) 217 | else: 218 | k = Q_choice 219 | cls = k % args.acts_per_round 220 | step = k - 1 221 | if cls >= 2: 222 | if i + step >= args.sample_size: 223 | r = -max_penalty 224 | terminal = True 225 | else: 226 | #r = -args.penalty * step #- step_penalty_rate / (args.sample_size - t[i]) 227 | r = -args.penalty * (2.0 - (0.5 ** (step - 1))) 228 | #if not args.fix_penalty else 1.0 - 0.5 * cnt 229 | terminal = False 230 | else: 231 | r = 1 if cls == y else -max_penalty #(-10 if y else -10) 232 | terminal = True 233 | if args.pos_weight > 1: 234 | if y: 235 | r *= args.pos_weight 236 | else: 237 | if not y: 238 | r /= args.pos_weight 239 | reward.append(r) 240 | action.append(k) 241 | if args.verbose: 242 | print i, (a, b), Qvalue[i], k, (y, cls), r 243 | if terminal: 244 | break 245 | i += step 246 | #memory.add(dict(cur = cur_id, reward=reward, action=action, y=y, cnt=1)) 247 | memory.add(dict(cur = cur, reward=reward, action=action, y=y, cnt=1)) 248 | if rand_ep <= e < fix_ep: 249 | epsilon -= epsilon_shr 250 | epsilon = max(epsilon, final_epsilon) 251 | if e * args.num_examples + batch < 50:#args.num_examples / 2: 252 | continue 253 | 254 | replays, idxes, weights = memory.sample(args.batch_size, args.pr_beta) 255 | new_weights = [] 256 | for b in xrange(args.batch_size): 257 | cur, reward, action, y = replays[b]['cur'], replays[b]['reward'], replays[b]['action'], replays[b]['y'] 258 | data_batch, delta_sum = agent.wash_data(cur), 0 259 | Qvalue = agent.get_Qvalue(data_batch, is_train=True) 260 | grad, r, grad_norm = np.zeros((args.sample_size, args.num_acts)), 0, 0 261 | t = args.min_imgs 262 | for i in xrange(len(action) - 1): 263 | t += action[i] - 1 264 | for i in xrange(len(action) - 1, -1, -1): 265 | if i < len(action) - 1: 266 | r = reward[i] + args.q_gamma * max(last_Q)#min(1.0, max(last_Q)) 267 | else: 268 | r = reward[i] 269 | #last_Q_, last_Q = Qvalue_[t], Qvalue[t] 270 | last_Q = Qvalue[t] 271 | if args.verbose: 272 | print i, t, action[i], y, Qvalue[t], r, 273 | delta = -r + Qvalue[t, action[i]] 274 | if not args.total_forward: 275 | delta /= len(action) 276 | if abs(delta) > 1: 277 | delta /= abs(delta) 278 | if args.hinge: 279 | if (y and action == 1 or not y and action == 0) and delta > 0: 280 | clipped_delta = 0 281 | elif (y and action == 0 or not y and action == 1) and delta < 0: 282 | clipped_delta = 0 283 | else: 284 | clipped_delta = delta 285 | else: 286 | clipped_delta = delta 287 | #if abs(clipped_delta) > 1: 288 | # clipped_delta /= abs(clipped_delta) 289 | grad[t, action[i]] = clipped_delta 290 | grad_norm += (clipped_delta) * (clipped_delta) 291 | if args.verbose: 292 | print delta, grad[i] 293 | delta_sum += abs(delta) 294 | if i > 0: 295 | t -= (action[i - 1] - 1) 296 | if args.total_forward: 297 | break 298 | new_weights.append(1) 299 | replays[b]['cnt'] += 1 300 | replays[b]['delta'] = delta 301 | grad_norm = math.sqrt(grad_norm) 302 | if args.verbose: 303 | print 'grad norm =', grad_norm 304 | agent.update([mx.nd.array(grad, ctx=devices[0]), mx.nd.zeros(agent.Q.get_outputs()[1].shape, ctx=devices[0])]) 305 | memory.update_priorities(idxes, new_weights) 306 | if args.verbose: 307 | print 'gamma =', args.q_gamma, 'epsilon =', epsilon 308 | #args.q_gamma = min(0.99, 0.99 * args.q_gamma + 0.01) 309 | 310 | if (e+1) % 1 == 0: 311 | agent.save(e+1) 312 | 313 | if not args.crossvalid: 314 | continue 315 | 316 | valid.reset() 317 | batch, valid_cnt, vv, vpool = 0, 0, np.zeros((N*2, N)), set() 318 | vs, vt = [0 for i in xrange(N+N)], [0 for i in xrange(N+N)] 319 | fts = [[0 for _2 in xrange(N)] for _1 in xrange(N)] 320 | while valid_cnt < N*N: 321 | if args.verbose: 322 | print 'Epoch', e, 'valid', batch, 'vc', valid_cnt 323 | batch += 1 324 | cur, a, b = valid.provide() 325 | y = ((a %N) == (b % N)) 326 | data_batch = agent.wash_data(cur) 327 | Qvalue = agent.get_Qvalue(data_batch, is_train=False) 328 | if args.verbose: 329 | print Qvalue 330 | print agent.Q.get_outputs()[1].asnumpy().max(), agent.Q.get_outputs()[1].asnumpy().min() 331 | i = 0 332 | while i < args.sample_size: 333 | if args.total_forward: 334 | if i + 1 < args.sample_size: 335 | k = 2 336 | else: 337 | k = np.argmax(Qvalue[i, :2]) 338 | else: 339 | k = np.argmax(Qvalue[i]) 340 | cls = k % args.acts_per_round 341 | step = k - 1 342 | if cls >= 2: 343 | if i + step >= args.sample_size: 344 | r = -max_penalty 345 | terminal = True 346 | else: 347 | r = -args.penalty * (2.0 - (0.5 ** (step - 1))) 348 | terminal = False 349 | else: 350 | r = 1 if cls == y else -max_penalty #(-10 if y else -10) 351 | terminal = True 352 | if args.pos_weight > 1: 353 | if y: 354 | r *= args.pos_weight 355 | else: 356 | if not y: 357 | r /= args.pos_weight 358 | if args.verbose: 359 | print 'valid', i, (a, b), Qvalue[i], k, (y, cls), r 360 | va, vb = a, b % N 361 | if (va, vb) not in vpool: 362 | vs[va] += r 363 | vs[vb+N] += r 364 | if terminal: 365 | if (va, vb) not in vpool:#vv[va][vb] == 0: 366 | fts[va][vb] = (k + (3 if va == vb else 0), i) 367 | vpool.add((va, vb)) 368 | valid_cnt += 1 369 | vv[va][vb] = Qvalue[i][0] - Qvalue[i][1] 370 | vt[va] += i + 1 371 | vv[vb+N][va] += vv[va][vb] 372 | vt[vb+N] += i + 1 373 | if args.verbose: 374 | print va, vb, vv[va][vb], r 375 | tbsQvalue = np.zeros(3) 376 | tbsQvalue[min(2, cls)] = Qvalue[i, k] 377 | tbs_V.put_board(tbsQvalue, min(2, cls), y, r, epsilon, i + 1, dummy=False) 378 | if terminal: 379 | break 380 | i += step 381 | if valid_cnt % 100 == 0: 382 | tbs_V.print_board() 383 | for i in xrange(N*2): 384 | a, r = i % N, 0 385 | for b in xrange(N): 386 | if a != b and vv[i][b] <= vv[i][a]: 387 | r += 1 388 | for k in xrange(4): 389 | cmcs[k].append(1.0 if r < cmcn[k] else 0.0) 390 | if len(cmcs[k]) >= N*4: 391 | summary_writer.add_scalar(args.mode[0] + ('_CMC%d'%cmcn[k]), sum(cmcs[k][-N*2:]) / (N*2), len(cmcs[k]) - N*2) 392 | vscores += [vs[i]] 393 | vturns += [vt[i]] 394 | score = np.array([-vv[i][_] for _ in xrange(N)]) 395 | label = np.array([(1 if _ == a else 0) for _ in xrange(N)]) 396 | ap.append(average_precision_score(label, score)) 397 | if args.verbose: 398 | print 'ap', i, ap[-1] 399 | if len(ap) >= N*4: 400 | summary_writer.add_scalar(args.mode[0] + '_MAP', sum(ap[-N*2:]) / (N*2), len(ap) - N*2) 401 | if len(ap) >= N*4: 402 | summary_writer.add_scalar(args.mode[0] + ('_scores'), sum(vscores[-N*2:]) / (N*2), len(vscores) - N*2) 403 | summary_writer.add_scalar(args.mode[0] + ('_turns'), sum(vturns[-N*2:]) / (N*2), len(vturns) - N*2) 404 | frf.write('%d %.3f %.3f %.3f %.3f %.3f %.3f\n'%(e, sum(cmcs[0][-N*2:]) / (N*2), sum(cmcs[1][-N*2:]) / (N*2), sum(cmcs[2][-N*2:]) / (N*2), sum(cmcs[3][-N*2:]) / (N*2), sum(ap[-N*2:]) / (N*2), sum(vturns[-N*2:])*1.0 / (N*N*2))) 405 | cnt_map = [[0 for j in xrange(6)] for i in xrange(args.sample_size)] 406 | for i in xrange(N): 407 | for j in xrange(N): 408 | cnt_map[fts[i][j][1]][fts[i][j][0]] += 1 409 | for i in xrange(args.sample_size): 410 | for j in xrange(6): 411 | frf.write(str(cnt_map[i][j]) + ' ') 412 | frf.write('\n') 413 | frf.flush() 414 | 415 | frf.close() 416 | --------------------------------------------------------------------------------