├── RL
├── find_mxnet.py
├── run.sh
├── symbol_alexnet.py
├── batch_provider_mars.py
├── replay_memory.py
├── batch_provider.py
├── utils.py
├── tb_system.py
├── base_module.py
├── agent.py
├── segment_tree.py
├── rnn_models.py
├── symbol_inception-bn.py
├── mars_test.py
├── img_lib.py
├── mars_test_baseline.py
├── symbols.py
├── find_eg.py
├── dqn_mars.py
└── dqn.py
├── baseline
├── find_mxnet.py
├── run.sh
├── symbol_alexnet.py
├── verifi_iterator.py
├── utils.py
├── preprocess_mars_image.py
├── extract.py
├── preprocess_ilds_image.py
├── preprocess_prid_image.py
├── symbol_inception-bn.py
├── even_iterator.py
├── baseline_test.py
├── calc_cmc.py
├── baseline.py
├── lsoftmax.py
└── loss_layers.py
└── README.md
/RL/find_mxnet.py:
--------------------------------------------------------------------------------
1 | try:
2 | import mxnet as mx
3 | except ImportError:
4 | import os, sys
5 | #curr_path = os.path.abspath(os.path.dirname(__file__))
6 | #sys.path.append(os.path.join(curr_path, "../../python"))
7 | sys.path.append('/home/tina/reid/mxnet/python')
8 | import mxnet as mx
9 |
--------------------------------------------------------------------------------
/baseline/find_mxnet.py:
--------------------------------------------------------------------------------
1 | try:
2 | import mxnet as mx
3 | except ImportError:
4 | import os, sys
5 | #curr_path = os.path.abspath(os.path.dirname(__file__))
6 | #sys.path.append(os.path.join(curr_path, "../../python"))
7 | sys.path.append('/home/tina/reid/mxnet/python')
8 | import mxnet as mx
9 |
--------------------------------------------------------------------------------
/RL/run.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | sets=0
4 | gpus=$1
5 | up=$2
6 | data_dir=$5
7 | case $3 in
8 | iLiDS-VID)
9 | main=dqn.py
10 | base=ilds_$4_$sets
11 | num_id=150
12 | train_set=image_valid$sets
13 | valid_set=image_test$sets
14 | ;;
15 | PRID-2011)
16 | main=dqn.py
17 | base=prid_$4_$sets
18 | num_id=100
19 | train_set=image_valid$sets
20 | valid_set=image_test$sets
21 | ;;
22 | MARS)
23 | main=dqn_mars.py
24 | base=mars_$4
25 | num_id=624
26 | train_set=image_valid
27 | valid_set=image_test
28 | ;;
29 | *)
30 | echo "No valid dataset"
31 | exit
32 | ;;
33 | esac
34 |
35 | bs=8
36 | ss=8
37 | ms=$ss
38 | lr=1e-4
39 | epochs=100
40 | ts=$(date "+%Y.%m.%d-%H.%M.%S")
41 | qg=0.9
42 | nh=128
43 | ns=32
44 | mode=DQN_test-$sets-$ts-bs$bs-ss$ss-$4
45 |
46 | python $main --gpus $gpus --data-dir $data_dir \
47 | --num-examples 100000 --num-id $num_id \
48 | --train-set $train_set --valid-set $valid_set \
49 | --sample-size $ss --batch-size $bs \
50 | --lr $lr --num-epoches $epochs --mode $3-TEST-$mode \
51 | --model-load-epoch 1 --model-load-prefix $base --q-gamma $qg \
52 | --penalty $up --num_hidden $nh --num_sim $ns \
53 | --min-states $ms --optimizer sgd \
54 | --epsilon --e2e --fusion
--------------------------------------------------------------------------------
/baseline/run.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | bs=8
4 | epochs=1
5 | sets=0
6 | gpus=$1
7 | data_dir=$4
8 | case $2 in
9 | iLiDS-VID)
10 | base=ilds_$3_$sets
11 | num_id=150
12 | train_set=image_valid$sets
13 | valid_set=image_test$sets
14 | ;;
15 | PRID-2011)
16 | base=prid_$3_$sets
17 | num_id=100
18 | train_set=image_valid$sets
19 | valid_set=image_test$sets
20 | ;;
21 | MARS)
22 | base=mars_$3
23 | num_id=624
24 | train_set=image_valid
25 | valid_set=image_test
26 | ;;
27 | *)
28 | echo "No valid dataset"
29 | exit
30 | ;;
31 | esac
32 |
33 | case $3 in
34 | alexnet)
35 | python baseline.py --gpus $gpus --data-dir $data_dir \
36 | --num-id $num_id --batch-size $bs \
37 | --train-set $train_set --valid-set $valid_set \
38 | --lr 1e-4 --num-epoches $epochs --mode $mode \
39 | --network alexnet --model-load-prefix alexnet --model-load-epoch 1
40 | ;;
41 | inception-bn)
42 | python baseline.py --gpus $gpus --data-dir $data_dir \
43 | --num-id $num_id --batch-size $bs \
44 | --train-set $train_set --valid-set $valid_set \
45 | --lr 1e-2 --num-epoches $epochs --mode $mode --lsoftmax
46 | ;;
47 | *)
48 | echo "No valid basenet"
49 | exit
50 | ;;
51 | esac
52 |
--------------------------------------------------------------------------------
/baseline/symbol_alexnet.py:
--------------------------------------------------------------------------------
1 | import find_mxnet
2 | import mxnet as mx
3 | import numpy as np
4 |
5 |
6 | def get_symbol(params=None):
7 | if params is None:
8 | params = dict([(name, mx.sym.Variable(name)) for name in\
9 | ['conv1_weight', 'conv1_bias', 'conv2_weight', 'conv2_bias',
10 | 'conv3_weight', 'conv3_bias', 'conv4_weight', 'conv4_bias',
11 | 'conv5_weight', 'conv5_bias']
12 | ])
13 |
14 | # data
15 | x = mx.symbol.Variable(name="data")
16 | x = mx.sym.Convolution(data=x, kernel=(11, 11), stride=(4, 4), num_filter=96, weight=params['conv1_weight'], bias=params['conv1_bias'])
17 | x = mx.sym.Activation(data=x, act_type='relu')
18 | x = mx.sym.LRN(data=x, alpha=0.0001, beta=0.75, knorm=2, nsize=5)
19 | x = mx.sym.Pooling(data=x, pool_type='max', kernel=(3, 3), stride=(2, 2))
20 |
21 | x = mx.sym.Convolution(data=x, kernel=(5, 5), pad=(2, 2), num_filter=256, num_group=2, weight=params['conv2_weight'], bias=params['conv2_bias'])
22 | x = mx.sym.Activation(data=x, act_type='relu')
23 | x = mx.sym.LRN(data=x, alpha=0.0001, beta=0.75, knorm=2, nsize=5)
24 | x = mx.sym.Pooling(data=x, kernel=(3, 3), stride=(2, 2), pool_type='max')
25 |
26 | x = mx.sym.Convolution(data=x, kernel=(3, 3), pad=(1, 1), num_filter=384, num_group=1, weight=params['conv3_weight'], bias=params['conv3_bias'])
27 | x = mx.sym.Activation(data=x, act_type='relu')
28 |
29 | x = mx.sym.Convolution(data=x, kernel=(3, 3), pad=(1, 1), num_filter=384, num_group=2, weight=params['conv4_weight'], bias=params['conv4_bias'])
30 | x = mx.sym.Activation(data=x, act_type='relu')
31 |
32 | x = mx.sym.Convolution(data=x, kernel=(3, 3), pad=(1, 1), num_filter=256, num_group=2, weight=params['conv5_weight'], bias=params['conv5_bias'])
33 | x = mx.sym.Activation(data=x, act_type='relu')
34 | x = mx.sym.Pooling(data=x, kernel=(3, 3), stride=(2, 2), pool_type='max')
35 |
36 | return x
37 |
--------------------------------------------------------------------------------
/RL/symbol_alexnet.py:
--------------------------------------------------------------------------------
1 | import find_mxnet
2 | import mxnet as mx
3 | import numpy as np
4 |
5 |
6 | def get_symbol(data=None, params=None, fix_gamma=None, global_stats=None):
7 | if params is None:
8 | params = dict([(name, mx.sym.Variable(name)) for name in\
9 | ['conv1_weight', 'conv1_bias', 'conv2_weight', 'conv2_bias',
10 | 'conv3_weight', 'conv3_bias', 'conv4_weight', 'conv4_bias',
11 | 'conv5_weight', 'conv5_bias']
12 | ])
13 |
14 | # data
15 | if data is None:
16 | x = mx.symbol.Variable(name="data")
17 | else:
18 | x = data
19 | x = mx.sym.Convolution(data=x, kernel=(11, 11), stride=(4, 4), num_filter=96, weight=params['conv1_weight'], bias=params['conv1_bias'])
20 | x = mx.sym.Activation(data=x, act_type='relu')
21 | x = mx.sym.LRN(data=x, alpha=0.0001, beta=0.75, knorm=2, nsize=5)
22 | x = mx.sym.Pooling(data=x, pool_type='max', kernel=(3, 3), stride=(2, 2))
23 |
24 | x = mx.sym.Convolution(data=x, kernel=(5, 5), pad=(2, 2), num_filter=256, num_group=2, weight=params['conv2_weight'], bias=params['conv2_bias'])
25 | x = mx.sym.Activation(data=x, act_type='relu')
26 | x = mx.sym.LRN(data=x, alpha=0.0001, beta=0.75, knorm=2, nsize=5)
27 | x = mx.sym.Pooling(data=x, kernel=(3, 3), stride=(2, 2), pool_type='max')
28 | x = mx.symbol.BlockGrad(x, name='block_conv2')
29 |
30 | x = mx.sym.Convolution(data=x, kernel=(3, 3), pad=(1, 1), num_filter=384, num_group=1, weight=params['conv3_weight'], bias=params['conv3_bias'])
31 | x = mx.sym.Activation(data=x, act_type='relu')
32 | #x = mx.symbol.BlockGrad(x, name='block_conv3')
33 |
34 | x = mx.sym.Convolution(data=x, kernel=(3, 3), pad=(1, 1), num_filter=384, num_group=2, weight=params['conv4_weight'], bias=params['conv4_bias'])
35 | x = mx.sym.Activation(data=x, act_type='relu')
36 | #x = mx.symbol.BlockGrad(x, name='block_conv4')
37 |
38 | x = mx.sym.Convolution(data=x, kernel=(3, 3), pad=(1, 1), num_filter=256, num_group=2, weight=params['conv5_weight'], bias=params['conv5_bias'])
39 | x = mx.sym.Activation(data=x, act_type='relu')
40 | x = mx.sym.Pooling(data=x, kernel=(3, 3), stride=(2, 2), pool_type='max')
41 | #x = mx.symbol.BlockGrad(x, name='block_conv5')
42 |
43 | return x
44 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | * [Multi-shot Re-identification](#1)
2 | * [Preparations](#1.1)
3 | * [Usage](#1.2)
4 |
5 |
Multi-shot Re-identification Based on Reinforcement Learning
6 |
7 | ---
8 |
9 | Training and testing codes for multi-shot Re-Identification. Currently, these codes are tested on the PRID-2011 dataset, iLiDS-VID dataset and MARS dataset. For algorithm details and experiment results, please refer our paper: [Multi-shot Pedestrian Re-identification via Sequential Decision Making](https://arxiv.org/abs/1712.07257)
10 |
11 | Preparations
12 |
13 | ---
14 |
15 | Before starting running this code, you should make the following preparations:
16 |
17 | * Download the [MARS](http://www.liangzheng.com.cn/Project/project_mars.html)
18 | , [iLIDS-VID](http://www.eecs.qmul.ac.uk/~xiatian/downloads_qmul_iLIDS-VID_ReID_dataset.html) and [PRID-2011](https://www.tugraz.at/institute/icg/research/team-bischof/lrs/downloads/PRID11/).
19 | * Install MXNet following the [instructions](http://mxnet.io/get_started/index.html#setup-and-installation) and install the python interface. Currently the repo is tested on commit e06c55.
20 |
21 | Usage
22 |
23 | ---
24 |
25 | * Download the datasets and unzip.
26 | * Prepare data file. Generate image list file according to the file `preprocess_ilds_image.py`
27 | , `preprocess_prid_image.py` and `preprocess_mars_image.py` under `baseline` folder.
28 | * The code is split to two stage, the first stage is a image based re-id task,
29 | please refer the script `run.sh` in `baseline` folder. The codes for this stage is based on [this repo](https://github.com/TuSimple/re-identification). The usage is:
30 | ```shell
31 | sh run.sh $gpu $dataset $network $recfloder
32 | ```
33 | e.g. If you want to train MARS dataset on gpu 0 using inception-bn, please run:
34 | ```shell
35 | sh run.sh 0 MARS inception-bn /data3/matt/MARS/recs
36 | ```
37 | * The second stage is a multi-shot re-id task based on reinforcement learning.
38 | Please refer the script `run.sh` in `RL` folder. The usage is:
39 | ```shell
40 | sh run.sh $gpu $unsure-penalty $dataset $network $recfloder
41 | ```
42 | * For evaluation, please use `baseline/baseline_test.py` and `RL/find_eg.py`. In `RL/find_eg.py`, we also show some example episodes with good quality generated by our algorithm.
43 |
44 |
45 |
--------------------------------------------------------------------------------
/RL/batch_provider_mars.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 |
4 |
5 | def get_data(t):
6 | try:
7 | batch = t.next()
8 | except StopIteration:
9 | t.reset()
10 | batch = t.next()
11 | return batch.data[0]
12 |
13 |
14 | class BatchProvider:
15 | def __init__(self, F, is_train, size, sample_ratio=0.5, need_feat=False, start=None, end=None, is_valid=False, agent=None):
16 | self.F = F
17 | self.is_train = is_train
18 | self.size = size
19 | self.N = len(F)
20 | self.sample_ratio = sample_ratio
21 | self.need_feat = need_feat
22 | self.valid = is_valid
23 | self.cnt = 0
24 | self.agent = agent
25 | if start is not None:
26 | self.start, self.end = start, end
27 | else:
28 | self.start, self.end = 0, self.N
29 | self.reset()
30 |
31 | def reset(self):
32 | if self.valid:
33 | self.cnt = 0
34 | self.terminal = [True for _ in xrange(self.size)]
35 | self.A = -1
36 | self.B = -1
37 | self.cA = -1
38 | self.cB = -1
39 | self.tA = -1
40 | self.tB = -1
41 | self.curA = np.zeros((self.size, 3, 224, 112))
42 | self.curB = np.zeros((self.size, 3, 224, 112))
43 |
44 | def get_img(self, F, aug=False):
45 | return get_data(F)[0].asnumpy()
46 |
47 | def provide(self, preload=None):
48 | if random.random() < self.sample_ratio:
49 | a = b = (np.random.choice(self.end-self.start, 1) + self.start)[0]
50 | while len(self.F[a]) < 2:
51 | a = b = np.random.choice(self.N, 1)[0]
52 | else:
53 | a, b = (np.random.choice(self.end-self.start, 2, replace=False)+self.start)
54 | self.A, self.B = a, b
55 | if not a == b:
56 | self.cA, self.cB = np.random.choice(len(self.F[a]), 1)[0], np.random.choice(len(self.F[b]), 1)[0]
57 | else:
58 | self.cA, self.cB = np.random.choice(len(self.F[a]), 2, replace=False)
59 | self.tA, self.tB = np.random.choice(len(self.F[a][self.cA]), 1)[0], np.random.choice(len(self.F[b][self.cB]), 1)[0]
60 | print self.A, self.cA, self.tA
61 | for i in xrange(self.size):
62 | self.curA[i] = self.get_img(self.F[self.A][self.cA][self.tA], True)#self.A%self.N==self.B%self.N)
63 | self.curB[i] = self.get_img(self.F[self.B][self.cB][self.tB], True)#self.A%self.N==self.B%self.N)
64 |
65 | cur = [np.array(self.curA), np.array(self.curB)]
66 | return cur, self.A, self.B
--------------------------------------------------------------------------------
/RL/replay_memory.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 | from segment_tree import SumSegmentTree, MinSegmentTree
4 | from utils import copyto
5 |
6 | class ReplayMemory:
7 | def __init__(self, replay_size, alpha=0.6):
8 | self.replay_size = replay_size
9 | self.cnt = 0
10 | self._alpha = alpha
11 | it_capacity = 1
12 | while it_capacity < replay_size:
13 | it_capacity *= 2
14 |
15 | self._it_sum = SumSegmentTree(it_capacity)
16 | self._it_min = MinSegmentTree(it_capacity)
17 | self._max_priority = 1.0
18 | self._storage = []
19 | self._maxsize = replay_size
20 | self._next_idx = 0
21 |
22 | def add(self, data):
23 | #new_data = []
24 | #for i in data:
25 | # i.wait_to_read()
26 | # new_data.append(copyto(i))
27 | if self._next_idx >= len(self._storage):
28 | self._storage.append(data)
29 | else:
30 | self._storage[self._next_idx] = data
31 | self._next_idx = (self._next_idx + 1) % self._maxsize
32 | idx = self._next_idx
33 | self._it_sum[idx] = self._max_priority ** self._alpha
34 | self._it_min[idx] = self._max_priority ** self._alpha
35 |
36 |
37 | def _sample_proportional(self, batch_size):
38 | res = []
39 | for _ in range(batch_size):
40 | mass = random.random() * self._it_sum.sum(0, len(self._storage) - 1)
41 | idx = self._it_sum.find_prefixsum_idx(mass)
42 | res.append(idx)
43 | return res
44 |
45 | def sample(self, batch_size, beta):
46 | assert beta > 0
47 |
48 | idxes = self._sample_proportional(batch_size)
49 |
50 | weights = []
51 | p_min = self._it_min.min() / self._it_sum.sum()
52 | max_weight = (p_min * len(self._storage)) ** (-beta)
53 |
54 | for idx in idxes:
55 | p_sample = self._it_sum[idx] / self._it_sum.sum()
56 | weight = (p_sample * len(self._storage)) ** (-beta)
57 | weights.append(weight / max_weight)
58 | #print self._it_min.min(), weights
59 | weights = np.array(weights)
60 | weights /= np.sum(weights)
61 | ret = []
62 | for i in xrange(batch_size):
63 | ret.append(self._storage[idxes[i]])
64 | return (ret, idxes, weights)
65 |
66 | def update_priorities(self, idxes, priorities):
67 | assert len(idxes) == len(priorities)
68 | for idx, priority in zip(idxes, priorities):
69 | assert priority > 0
70 | assert 0 <= idx < len(self._storage)
71 | self._it_sum[idx] = priority ** self._alpha
72 | self._it_min[idx] = priority ** self._alpha
73 |
74 | self._max_priority = max(self._max_priority, priority)
75 |
76 |
77 |
--------------------------------------------------------------------------------
/baseline/verifi_iterator.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 | import numpy as np
3 |
4 |
5 | class verifi_iterator(mx.io.DataIter):
6 | '''
7 | Data iterator
8 | Combine two iterators (one totally shuffles, one contains pairs)
9 | '''
10 | def __init__(self, data_iter1, data_iter2,
11 | use_verifi=False, use_center=False, use_lsoftmax=False, gpus=1):
12 | super(verifi_iterator, self).__init__()
13 | self.data_iter1 = data_iter1
14 | self.data_iter2 = data_iter2
15 | self.batch_size = self.data_iter1.batch_size * 2
16 | self.gpus = gpus
17 | self.use_verifi = use_verifi
18 | self.use_center = use_center
19 | self.use_lsoftmax = use_lsoftmax
20 | print "gpus", self.gpus
21 |
22 | @property
23 | def provide_data(self):
24 | provide_data = self.data_iter1.provide_data[0]
25 | shape = list(provide_data[1])
26 | shape[0] *= 2
27 |
28 | return [(provide_data[0], tuple(shape))]
29 |
30 | @property
31 | def provide_label(self):
32 | # provide_label = self.data_iter1.provide_label[0][1]
33 | # Different labels should be used here for actual application
34 | labels = [('softmax_label', (self.batch_size,))]
35 | if self.use_lsoftmax:
36 | labels.append(('lsoftmax_label', (self.batch_size,)))
37 | if self.use_verifi:
38 | labels.append(('verifi_label', (self.batch_size,)))
39 | if self.use_center:
40 | labels.append(('center_label',(self.batch_size,)))
41 | return labels
42 |
43 | def hard_reset(self):
44 | self.data_iter1.hard_reset()
45 | self.data_iter2.hard_reset()
46 |
47 | def reset(self):
48 | self.data_iter1.reset()
49 | self.data_iter2.reset()
50 |
51 | def next(self):
52 | batch1 = self.data_iter1.next()
53 | batch2 = self.data_iter2.next()
54 |
55 | def concat_array(data1, data2, gpus, ndarray=True):
56 | n = data2.shape[0]
57 | k = n / gpus
58 | data_lst = []
59 | for i in range(0, n, n / gpus):
60 | data_lst.append(data1[i:i + k])
61 | data_lst.append(data2[i:i + k])
62 |
63 | # print data_lst[0].shape, data_lst[1].shape
64 | data = mx.nd.concatenate(data_lst) if ndarray\
65 | else np.concatenate(data_lst)
66 |
67 | return data
68 |
69 | data = concat_array(batch1.data[0], batch2.data[0], self.gpus)
70 | label = concat_array(batch1.label[0], batch2.label[0], self.gpus)
71 | index = concat_array(
72 | batch1.index, -batch2.index, self.gpus, ndarray=False)
73 |
74 | labels = [label]
75 | if self.use_verifi:
76 | labels.append(label)
77 | if self.use_lsoftmax:
78 | labels.append(label)
79 | if self.use_center:
80 | labels.append(label)
81 | # print data.shape
82 | return mx.io.DataBatch(data=[data],
83 | label=labels,
84 | pad=batch1.pad + batch2.pad,
85 | index=index)
86 |
--------------------------------------------------------------------------------
/baseline/utils.py:
--------------------------------------------------------------------------------
1 | import find_mxnet
2 | import mxnet as mx
3 | from even_iterator import Even_iterator
4 |
5 | def load_checkpoint(prefix, epoch):
6 | # symbol = sym.load('%s-symbol.json' % prefix)
7 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
8 | arg_params = {}
9 | aux_params = {}
10 | for k, v in save_dict.items():
11 | tp, name = k.split(':', 1)
12 | if name in ['triplet_match', 'triplet', 'lmnn', 'lsoftmax', 'lsoftmax_weight', 'lsoftmax_label']:
13 | continue
14 | if tp == 'arg':
15 | arg_params[name] = v
16 | if tp == 'aux':
17 | aux_params[name] = v
18 | return (arg_params, aux_params)
19 |
20 | def copyto(x):
21 | return x.copyto(x.context)
22 |
23 | def get_imRecordIter(args, name, input_shape, batch_size, kv=None, shuffle=False, aug=False, even_iter=False):
24 | '''
25 | get iterator use even_iterator or ImageRecordIter
26 | '''
27 | if even_iter:
28 | aug_params = {}
29 | aug_params['resize'] = 128
30 | aug_params['rand_crop'] = aug
31 | aug_params['rand_mirror'] = aug
32 | aug_params['input_shape'] = input_shape
33 | aug_params['mean'] = 128.0
34 |
35 | dataiter = Even_iterator(
36 | '%s/%s.lst' % (args.data_dir, name),
37 | batch_size=batch_size,
38 | aug_params=aug_params,
39 | shuffle=shuffle,
40 | data_dir = args.data_dir)
41 | else:
42 | if aug:
43 | dataiter = mx.io.ImageRecordIter(
44 | path_imglist="%s/%s.lst" % (args.data_dir, name),
45 | path_imgrec="%s/%s.rec" % (args.data_dir, name),
46 | # mean_img="models/market_mean.bin",
47 | mean_r=128.0,
48 | mean_g=128.0,
49 | mean_b=128.0,
50 | rand_crop=True,
51 | rand_mirror=True,
52 | max_random_contrast=0.1,
53 | max_random_illumination=0.1,
54 | max_aspect_ratio=0.1,
55 | max_shear_ratio=0.2,
56 | random_h=10,
57 | random_s=10,
58 | random_l=10,
59 | #max_random_contrast=0.2,
60 | #max_random_illumination=0.2,
61 | #max_aspect_ratio=0.2,
62 | #max_shear_ratio=0.2,
63 | #random_h=30,
64 | #random_s=30,
65 | #random_l=30,
66 | prefetch_buffer=4,
67 | preprocess_threads=4,
68 | shuffle=shuffle,
69 | label_width=1,
70 | round_batch=True,
71 | data_shape=input_shape,
72 | batch_size=batch_size,)
73 | #num_parts=kv.num_workers,
74 | #part_index=kv.rank)
75 | else:
76 | dataiter = mx.io.ImageRecordIter(
77 | path_imglist="%s/%s.lst" % (args.data_dir, name),
78 | path_imgrec="%s/%s.rec" % (args.data_dir, name),
79 | # mean_img="models/market_mean.bin",
80 | mean_r=128.0,
81 | mean_g=128.0,
82 | mean_b=128.0,
83 | prefetch_buffer=4,
84 | preprocess_threads=4,
85 | shuffle=shuffle,
86 | label_width=1,
87 | round_batch=True,
88 | data_shape=input_shape,
89 | batch_size=batch_size,)
90 | #num_parts=kv.num_workers,
91 | #part_index=kv.rank)
92 |
93 | return dataiter
94 |
--------------------------------------------------------------------------------
/baseline/preprocess_mars_image.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import csv
4 | import random
5 | import glob
6 | import numpy as np
7 |
8 | ROOT = '/data3/matt/MARS'
9 | output = '/data3/matt/MARS/recs'
10 | im2rec = '/home/tina/reid/mxnet/bin/im2rec'
11 |
12 |
13 | def load_split():
14 | train, test = [], []
15 | cnt = 0
16 | for i in xrange(386):
17 | cam_a = glob.glob('%s/multi_shot/cam_a/person_%04d/*.png' % (ROOT, i))
18 | cam_b = glob.glob('%s/multi_shot/cam_b/person_%04d/*.png' % (ROOT, i))
19 | if len(cam_a) * len(cam_b) > 0:
20 | cnt += 1
21 | if cnt > 100:
22 | test.append(i)
23 | else:
24 | train.append(i)
25 | if cnt >= 200:
26 | break
27 | return train, test
28 |
29 | def rnd_pos(N, i):
30 | x = random.randint(0, N - 2)
31 | return x + 1 if x == i else x
32 |
33 | def save_rec(lst, path, name):
34 | lst_file = '%s/%s.lst' % (path, name)
35 | rec_file = '%s/%s.rec' % (path, name)
36 | #print lst_file, rec_file, '%s %s %s %s resize=128 quality=90' % (im2rec, lst_file, ROOT, rec_file)
37 | fo = csv.writer(open(lst_file, "w"), delimiter='\t', lineterminator='\n')
38 | for item in lst:
39 | fo.writerow(item)
40 | print 'echo 123456 | sudo -S %s %s %s %s resize=128 quality=90 &' % (im2rec, lst_file, ROOT, rec_file)
41 | #subprocess.call('%s %s %s %s resize=128 quality=90' % (im2rec, lst_file, ROOT, rec_file))
42 |
43 | def save_train(f, is_valid=False):
44 | plst, nlst, cnt, N, pool = [], [], 0, len(f), [_ for _ in xrange(len(f))]
45 | for _ in xrange(100000 if not is_valid else 2000):
46 | ts = random.sample(pool, 96)
47 | ns, ps = ts[:64], ts[64:]
48 | for r in xrange(32):
49 | i, x, y = ps[r], ns[r + r], ns[r + r + 1]
50 | p1c = random.randint(0, len(f[i]) - 1)
51 | p2c = rnd_pos(len(f[i]), p1c)
52 | p1 = (cnt, i, f[i][p1c][random.randint(0, len(f[i][p1c]) - 1)])
53 | p2 = (cnt + 1, i, f[i][p2c][random.randint(0, len(f[i][p2c]) - 1)])
54 | n1c = random.randint(0, len(f[x]) - 1)
55 | n2c = random.randint(0, len(f[y]) - 1)
56 | n1 = (cnt, x, f[x][n1c][random.randint(0, len(f[x][n1c]) - 1)])
57 | n2 = (cnt + 1, y, f[y][n2c][random.randint(0, len(f[y][n2c]) - 1)])
58 | cnt += 2
59 | plst.append(p1)
60 | plst.append(p2)
61 | nlst.append(n1)
62 | nlst.append(n2)
63 | save_rec(plst, output, 'image_' + ('valid' if is_valid else 'train') + '_even')
64 | save_rec(nlst, output, 'image_' + ('valid' if is_valid else 'train') + '_rand')
65 |
66 | def gen_train():
67 | pool = []
68 | for i in xrange(1500):
69 | images = glob.glob('%s/bbox_train/%04d/*.jpg' % (ROOT, i))
70 | f = dict()
71 | for k in images:
72 | name = k.split('/')[-1]
73 | ct = name[4:6]
74 | if not ct in f:
75 | f[ct] = []
76 | f[ct].append(k[len(ROOT):])
77 | g = []
78 | for x in f:
79 | if len(f[x]) > 1:
80 | g.append(f[x])
81 | if len(g) <= 1:
82 | continue
83 | pool.append(g)
84 |
85 | save_train(pool)
86 | save_train(pool, is_valid=True)
87 |
88 | def naive_lst(dataset):
89 | lst_file = open('%s/MARS-evaluation/info/%s_name.txt' % (ROOT, dataset))
90 | lst, cnt = [], 0
91 | for line in lst_file:
92 | s = line.strip()
93 | lst.append((cnt, 0, '/bbox_%s/%s/%s' % (dataset, s[:4], s)))
94 | cnt += 1
95 | save_rec(lst, output, 'eval_' + dataset)
96 |
97 | if __name__ == '__main__':
98 | #naive_lst('train')
99 | #naive_lst('test')
100 | gen_train()
101 |
--------------------------------------------------------------------------------
/RL/batch_provider.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 |
4 | class BatchProvider:
5 | def __init__(self, F, lst, is_train, size, sample_ratio=0.5, need_feat=False, start=None, end=None, is_valid=False, agent=None):
6 | self.F = F
7 | self.lst = lst
8 | self.is_train = is_train
9 | self.size = size
10 | self.N = lst.shape[0] / 2
11 | self.sample_ratio = sample_ratio
12 | self.need_feat = need_feat
13 | self.valid = is_valid
14 | self.cnt = 0
15 | self.agent = agent
16 | if start is not None:
17 | self.start, self.end = start, end
18 | else:
19 | self.start, self.end = 0, self.N
20 | self.vid = []
21 | for i in xrange(self.N + self.N):
22 | for j in xrange(lst[i + 1] - lst[i]):
23 | self.vid.append(i)
24 | self.epoch_rounds = lst[-1] * 2
25 | self.first_imgs = [(i, j) for i in xrange(lst[-1]) for j in xrange(2)]
26 | random.shuffle(self.first_imgs)
27 | self.vid = []
28 | for i in xrange(self.N + self.N):
29 | for j in xrange(lst[i + 1] - lst[i]):
30 | self.vid.append(i)
31 | self.reset()
32 | self.hit_cnt = np.zeros(self.epoch_rounds / 2)
33 | self.img_rank = []
34 | for i in xrange(self.N + self.N):
35 | g = []
36 | for j in xrange(lst[i], lst[i + 1]):
37 | g.append(j)
38 | random.shuffle(g)
39 | self.img_rank.append(g)
40 |
41 |
42 | def reset(self):
43 | if self.valid:
44 | self.cnt = 0
45 | self.terminal = [True for _ in xrange(self.size)]
46 | self.A = -1
47 | self.B = -1
48 | self.curA = np.zeros((self.size, 3, 224, 112))
49 | self.curB = np.zeros((self.size, 3, 224, 112))
50 |
51 |
52 | def get_img(self, i, aug=False):
53 | idx = random.randrange(self.lst[i], self.lst[i + 1])
54 | self.hit_cnt[idx] += 1
55 | return self.F.get_single(idx, aug), idx
56 |
57 |
58 | def provide(self, preload=None):
59 | if preload is None:
60 | if self.valid:
61 | next = self.cnt
62 | self.cnt += 1
63 | self.A = next / self.N
64 | self.B = next % self.N
65 | self.B += self.N
66 | for i in xrange(self.size):
67 | self.curA[i] = self.F.get_single(self.img_rank[self.A][i % len(self.img_rank[self.A])])
68 | self.curB[i] = self.F.get_single(self.img_rank[self.B][i % len(self.img_rank[self.B])])
69 | else:
70 | first_img = self.first_imgs[self.cnt % self.epoch_rounds]
71 | self.cnt += 1
72 | a = self.vid[first_img[0]]
73 | if a < self.N:
74 | if first_img[1] == 1:
75 | b = a + self.N
76 | else:
77 | b = self.vid[random.randrange(self.lst[self.N], self.lst[self.N+self.N])]
78 | while b == a + self.N:
79 | b = self.vid[random.randrange(self.lst[self.N], self.lst[self.N+self.N])]
80 | else:
81 | if first_img[1] == 1:
82 | b = a - self.N
83 | else:
84 | b = self.vid[random.randrange(self.lst[0], self.lst[self.N])]
85 | while b == a - self.N:
86 | b = self.vid[random.randrange(self.lst[0], self.lst[self.N])]
87 | self.A, self.B = a, b
88 | idx = []
89 | for i in xrange(self.size):
90 | self.curA[i], ida = self.get_img(self.A, True)#self.A%self.N==self.B%self.N)
91 | self.curB[i], idb = self.get_img(self.B, True)#self.A%self.N==self.B%self.N)
92 | idx.append((ida, idb))
93 | else:
94 | for i in xrange(self.size):
95 | self.curA[i], self.curB[i] = self.F.get_single(preload[i][0]), self.F.get_single(preload[i][1])
96 |
97 | cur = [np.array(self.curA), np.array(self.curB)]
98 | if not self.valid:
99 | if preload is None:
100 | return cur, self.A, self.B, idx
101 | else:
102 | return cur
103 | return cur, self.A, self.B
--------------------------------------------------------------------------------
/RL/utils.py:
--------------------------------------------------------------------------------
1 | import find_mxnet
2 | import mxnet as mx
3 | from img_lib import ImgLibrary
4 |
5 |
6 | def dist(a, b):
7 | diff = mx.nd.L2Normalization(mx.nd.expand_dims(a, axis=0)) - mx.nd.L2Normalization(mx.nd.expand_dims(b, axis=0))
8 | return mx.nd.sum(diff * diff).asnumpy()[0]
9 |
10 |
11 | class TimeInvScheduler(mx.lr_scheduler.LRScheduler):
12 | def __init__(self, step, stop_factor_lr=1e-8):
13 | super(TimeInvScheduler, self).__init__()
14 | if step < 1:
15 | raise ValueError("Schedule step must be greater or equal than 1 round")
16 | self.step = step
17 | self.stop_factor_lr = stop_factor_lr
18 |
19 | def __call__(self, num_update):
20 | t = num_update / self.step
21 | lr = self.base_lr * 1.0 / (1.0 + t)
22 | if lr < self.stop_factor_lr:
23 | lr = self.stop_factor_lr
24 | return lr
25 |
26 |
27 | def load_checkpoint(prefix, epoch):
28 | # symbol = sym.load('%s-symbol.json' % prefix)
29 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
30 | arg_params = {}
31 | aux_params = {}
32 | for k, v in save_dict.items():
33 | tp, name = k.split(':', 1)
34 | #if name in ['triplet_match', 'triplet', 'lmnn', 'lsoftmax', 'lsoftmax_weight', 'lsoftmax_label']:
35 | # continue
36 | if tp == 'arg':
37 | arg_params[name] = v
38 | if tp == 'aux':
39 | aux_params[name] = v
40 | return (arg_params, aux_params)
41 |
42 |
43 | def copyto(x):
44 | return x.copyto(x.context)
45 |
46 |
47 | def get_imRecordIter(args, name, input_shape, batch_size, kv=None, shuffle=False, aug=False, even_iter=False):
48 | '''
49 | get iterator use ImgLibrary or ImageRecordIter
50 | '''
51 | if even_iter:
52 | aug_params = {}
53 | aug_params['resize'] = 128
54 | aug_params['rand_crop'] = aug
55 | aug_params['rand_mirror'] = aug
56 | aug_params['input_shape'] = input_shape
57 | aug_params['mean'] = 128.0
58 |
59 | dataiter = ImgLibrary(
60 | '%s/%s.lst' % (args.data_dir, name),
61 | batch_size=batch_size,
62 | aug_params=aug_params,
63 | shuffle=shuffle,
64 | data_dir = args.data_dir)
65 | else:
66 | if aug:
67 | dataiter = mx.io.ImageRecordIter(
68 | path_imglist="%s/%s.lst" % (args.data_dir, name),
69 | path_imgrec="%s/%s.rec" % (args.data_dir, name),
70 | # mean_img="models/market_mean.bin",
71 | mean_r=128.0,
72 | mean_g=128.0,
73 | mean_b=128.0,
74 | rand_crop=True,
75 | rand_mirror=True,
76 | #max_random_contrast=0.1,
77 | #max_random_illumination=0.1,
78 | #max_aspect_ratio=0.1,
79 | #max_shear_ratio=0.2,
80 | #random_h=10,
81 | #random_s=10,
82 | #random_l=10,
83 | #max_random_contrast=0.2,
84 | #max_random_illumination=0.2,
85 | #max_aspect_ratio=0.2,
86 | #max_shear_ratio=0.2,
87 | #random_h=30,
88 | #random_s=30,
89 | #random_l=30,
90 | prefetch_buffer=4,
91 | preprocess_threads=4,
92 | shuffle=shuffle,
93 | label_width=1,
94 | round_batch=True,
95 | data_shape=input_shape,
96 | batch_size=batch_size,)
97 | #num_parts=kv.num_workers,
98 | #part_index=kv.rank)
99 | else:
100 | dataiter = mx.io.ImageRecordIter(
101 | path_imglist="%s/%s.lst" % (args.data_dir, name),
102 | path_imgrec="%s/%s.rec" % (args.data_dir, name),
103 | # mean_img="models/market_mean.bin",
104 | mean_r=128.0,
105 | mean_g=128.0,
106 | mean_b=128.0,
107 | prefetch_buffer=4,
108 | preprocess_threads=4,
109 | shuffle=shuffle,
110 | label_width=1,
111 | round_batch=True,
112 | data_shape=input_shape,
113 | batch_size=batch_size,)
114 | #num_parts=kv.num_workers,
115 | #part_index=kv.rank)
116 |
117 | return dataiter
118 |
--------------------------------------------------------------------------------
/RL/tb_system.py:
--------------------------------------------------------------------------------
1 | class TensorBoardSystem:
2 | def __init__(self, pre, writer):
3 | self.tb_pool = {}
4 | self.pre = pre
5 | self.init_board()
6 | self.heartbeat = 0
7 | self.writer = writer
8 | self.tp = 0
9 | self.fp = 0
10 | self.tn = 0
11 | self.fn = 0
12 |
13 | def init_board(self):
14 | #pool_names = ['softmax_acc', 'triplet_loss', 'triplet_neg', 'triplet_pos', 'triplet_diff', 'triplet_ratio']
15 | Q_pool_names = ['neg_neg', 'pos_round', 'neg_ratio', 'neg_round', 'pos_pos', 'pos_ratio', 'pos_acc', 'neg_acc', 'Qvalue_0', 'Qvalue_1', 'Qvalue_2', 'Qvalue_3', 'Qgt_0', 'Qgt_1', 'Qdiff_2', 'Qdiff_3']#, 'epsilon']
16 | #for i in pool_names:
17 | # add_board(tb_pool, i)
18 | '''for i in xrange(seq_len):
19 | pre = 'Q' + str(i) + '_'
20 | for j in Q_pool_names:
21 | add_board(tb_pool, pre + j)'''
22 | for j in Q_pool_names:
23 | self.add_board(self.pre + '_' + j)
24 |
25 | def add_board(self, name):
26 | self.tb_pool[name] = [0, 0]
27 |
28 | def update_board(self, name, v):
29 | self.tb_pool[name][0] += v
30 | self.tb_pool[name][1] += 1.0
31 |
32 | def get_board(self, name):
33 | if self.tb_pool[name][1] > 0.5:
34 | return (self.tb_pool[name][0] / self.tb_pool[name][1], self.heartbeat)
35 | else:
36 | return (0, 0)
37 |
38 | def put_board(self, Qvalue, action, t, delta, epsilon, rounds, dummy=False):
39 | '''for i in xrange(len(label)):
40 | update_board(tb_pool, pool_names[0], softmax_output[i] == label[i])
41 | for i in xrange(len(triplet_output)):
42 | t = triplet_output[i].asnumpy()
43 | for j in xrange(args.batch_size):
44 | update_board(tb_pool, pool_names[1 + i], t[j])'''
45 | act = action
46 | pre = self.pre
47 | self.update_board(('%s_Qvalue_%d' % (pre, act)), Qvalue[act])
48 | self.update_board(('%s_Qgt_%d' % (pre, t)), Qvalue[1 if t else 0])
49 | if act == 2:
50 | if t == 1:
51 | self.update_board('%s_neg_neg' % (pre), 0.0)
52 | else:
53 | self.update_board('%s_neg_neg' % (pre), 1.0)
54 | self.update_board('%s_neg_ratio' % (pre), 1.0)
55 | elif act == 3:
56 | if t == 1:
57 | self.update_board('%s_pos_pos' % (pre), 1.0)
58 | else:
59 | self.update_board('%s_pos_pos' % (pre), 0.0)
60 | self.update_board('%s_pos_ratio' % (pre), 1.0)
61 | else:
62 | if act == 1:
63 | if t == 1:
64 | self.tp += 1.0
65 | else:
66 | self.fp += 1.0
67 | else:
68 | if t == 1:
69 | self.fn += 1.0
70 | else:
71 | self.tn += 1.0
72 | self.update_board('%s_pos_ratio' % (pre), 0.0)
73 | self.update_board('%s_neg_ratio' % (pre), 0.0)
74 | self.update_board('%s_pos_pos' % (pre), t == act)
75 | if t == 1:
76 | self.update_board('%s_pos_acc' % (pre), act == 1)
77 | if not dummy:
78 | self.update_board('%s_pos_round' % (pre), rounds)
79 | else:
80 | self.update_board('%s_neg_acc' % (pre), act == 0)
81 | if not dummy:
82 | self.update_board('%s_neg_round' % (pre), rounds)
83 | #self.update_board('Q_epsilon', epsilon)
84 |
85 | def print_board(self):
86 | for i in self.tb_pool:
87 | v = self.get_board(i)
88 | if v[1] > 0:
89 | self.writer.add_scalar(i, v[0], v[1])
90 | if (self.tp + self.fp > 0) and (self.tp + self.fn > 0):
91 | precision = 1.0 * self.tp / (self.tp + self.fp)
92 | recall = 1.0 * self.tp / (self.tp + self.fn)
93 | gm = (precision * recall) ** 0.5
94 | acc = 1.0 * (self.tp + self.tn) / (self.tp + self.tn + self.fp + self.fn)
95 | self.writer.add_scalar(self.pre + '_' + 'precision', precision, self.heartbeat)
96 | self.writer.add_scalar(self.pre + '_' + 'recall', recall, self.heartbeat)
97 | self.writer.add_scalar(self.pre + '_' + 'gm', gm, self.heartbeat)
98 | self.writer.add_scalar(self.pre + '_' + 'acc', acc, self.heartbeat)
99 | if precision + recall > 0:
100 | f1 = 2.0 * (precision * recall) / (precision + recall)
101 | self.writer.add_scalar(self.pre + '_' + 'f1', f1, self.heartbeat)
102 | self.heartbeat += 1
103 |
--------------------------------------------------------------------------------
/baseline/extract.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.insert(0, "mxnet/python/")
3 | import find_mxnet
4 | import mxnet as mx
5 | import logging
6 | import numpy as np
7 | import argparse
8 | import time
9 | import random
10 | import loss_layers
11 | import lsoftmax
12 | import loss_drop_layers
13 | import pairwiseDropout
14 | import scipy.io as sio
15 | import h5py
16 |
17 | # extract features for testing set
18 |
19 | def get_imRecordIter(name, input_shape, batch_size, kv, shuffle=False, aug=False):
20 | dataiter = mx.io.ImageRecordIter(
21 | path_imglist="%s/%s.lst" % (args.data_dir, name),
22 | path_imgrec="%s/%s.rec" % (args.data_dir, name),
23 | #mean_img="models/market_mean.bin",
24 | mean_r=128.0,
25 | mean_g=128.0,
26 | mean_b=128.0,
27 | rand_crop=aug,
28 | rand_mirror=aug,
29 | prefetch_buffer=4,
30 | preprocess_threads=3,
31 | shuffle=shuffle,
32 | label_width=1,
33 | data_shape=input_shape,
34 | batch_size=batch_size,
35 | num_parts=kv.num_workers,
36 | part_index=kv.rank)
37 |
38 | return dataiter
39 |
40 |
41 | def extract_feature(model, iterator, sav_name, num, batch_size):
42 | feature = np.zeros((num, args.feature_size))
43 | now = 0
44 | iterator.reset()
45 | for batch in iterator:
46 | data = batch.data[0]
47 | output = model.predict(data)
48 | real_size = batch_size - batch.pad
49 | output = output[:real_size]
50 |
51 | feature[now:now+real_size] = output
52 | now += real_size
53 |
54 | print feature.shape, now
55 | h5f = h5py.File(sav_name, 'w')
56 | h5f.create_dataset('feat', data=feature)
57 | h5f.close()
58 | #data = {'feat': feature}
59 | #sio.savemat(sav_name, data, do_compression=True)
60 | #np.savetxt(sav_name[:-4]+'.csv', feature)
61 | # with open(sav_name, "w") as f:
62 | # cPickle.dump(feature, f, protocol=cPickle.HIGHEST_PROTOCOL)
63 |
64 |
65 | def parse_args():
66 | parser = argparse.ArgumentParser(
67 | description='single domain car recog training')
68 | parser.add_argument('--gpus', type=str, default='6',
69 | help='the gpus will be used, e.g "0,1,2,3"')
70 | parser.add_argument('--data-dir', type=str,
71 | default="/data3/matt/iLIDS-VID/recs",
72 | help='data directory')
73 | parser.add_argument('--batch-size', type=int, default=1024,
74 | help='the batch size')
75 | parser.add_argument('--feature-size', type=int, default=1024,
76 | help='the feature size')
77 | parser.add_argument('--mode', type=str, default='ilds_baseline_b4',
78 | help='model mode')
79 | parser.add_argument('--dataset', type=str, default='image_test',
80 | help='dataset (test/query)')
81 | parser.add_argument('--kv-store', type=str,
82 | default='device', help='the kvstore type')
83 | parser.add_argument('--model-load-epoch', type=int, default=1,
84 | help='load the model on an epoch using the model-load-prefix')
85 | parser.add_argument('--model-load-prefix', type=str, default="test",
86 | help='load model prefix')
87 | return parser.parse_args()
88 |
89 |
90 | def load_checkpoint(prefix, epoch):
91 | # ssymbol = sym.load('%s-symbol.json' % prefix)
92 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
93 | arg_params = {}
94 | aux_params = {}
95 | for k, v in save_dict.items():
96 | tp, name = k.split(':', 1)
97 | if tp == 'arg':
98 | arg_params[name] = v
99 | if tp == 'aux':
100 | aux_params[name] = v
101 | return (arg_params, aux_params)
102 |
103 |
104 | args = parse_args()
105 |
106 | print args
107 | batch_size = args.batch_size
108 | devices = [mx.gpu(int(i)) for i in args.gpus.split(',')]
109 |
110 | symbol, arg_params, aux_params = mx.model.load_checkpoint(
111 | 'models/%s' % args.mode, args.model_load_epoch)
112 |
113 | internals = symbol.get_internals()
114 | symbol = internals["flatten_output"]
115 | l2 = mx.symbol.L2Normalization(data=symbol, name='l2_norm')
116 | kv = mx.kvstore.create(args.kv_store)
117 | dataiter = get_imRecordIter(
118 | '%s' % args.dataset, (3, 224, 112), batch_size,
119 | kv, shuffle=False, aug=False)
120 |
121 | model = mx.model.FeedForward(
122 | symbol=l2, ctx=devices, arg_params=arg_params,
123 | aux_params=aux_params, allow_extra_params=True)
124 |
125 | num = len(file('%s/%s.lst' % (args.data_dir, args.dataset)).read().splitlines())
126 | extract_feature(model, dataiter, 'features/%s-%s.mat' % (args.dataset, args.mode), num, batch_size)
127 | print ('done')
128 |
--------------------------------------------------------------------------------
/RL/base_module.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import warnings
3 | import find_mxnet
4 | import mxnet as mx
5 | import numpy as np
6 | from mxnet.module import Module
7 | from mxnet import context as ctx
8 | from mxnet.initializer import Uniform
9 | from mxnet import ndarray as nd
10 |
11 | COUNT_MAX = 1
12 | USE_AVERAGE = False
13 |
14 | class BaseModule(Module):
15 | def __init__(self, symbol, data_names=('data',), label_names=('softmax_label',),
16 | logger=logging, context=ctx.cpu(), work_load_list=None,
17 | fixed_param_names=None, state_names=None):
18 | # count how many times gradients be added
19 | self.add_counter = 0
20 | self.count_max = COUNT_MAX
21 | super(BaseModule, self).__init__(symbol=symbol, data_names=data_names,
22 | label_names=label_names, logger=logger, context=context,
23 | fixed_param_names=fixed_param_names)
24 |
25 | def clear_gradients(self):
26 | """clear gradient
27 | """
28 | self.add_counter = 0
29 | for grads in self._exec_group.grad_arrays:
30 | for grad in grads:
31 | grad -= grad
32 |
33 | def aver_gradients(self, n):
34 | ''' get average gradients
35 | '''
36 | for grads in self._exec_group.grad_arrays:
37 | for grad in grads:
38 | grad /= float(n)
39 |
40 | def add_gradients_from_module(self, from_module):
41 | """add gradients
42 | """
43 | self.add_counter += 1
44 | gradfrom = [[grad.copyto(grad.context) for grad in grads] for grads in
45 | from_module._exec_group.grad_arrays]
46 | for gradsto, gradsfrom in zip(self._exec_group.grad_arrays,
47 | gradfrom):
48 | for gradto, gradfrom in zip(gradsto, gradsfrom):
49 | gradto += gradfrom
50 |
51 | if self.add_counter == self.count_max:
52 | if USE_AVERAGE:
53 | self.aver_gradients(self.add_counter)
54 | self.update()
55 | self.clear_gradients()
56 | self.add_counter = 0
57 |
58 | def copy_from_module(self, from_module):
59 | """copy from another module
60 | """
61 | arg_params, aux_params = from_module.get_params()
62 | self.init_params(initializer=None, arg_params=arg_params,
63 | aux_params=aux_params, force_init=True)
64 |
65 | def copy_param_from_module(self, from_module):
66 | arg_params, _ = from_module.get_params()
67 | _, aux_params = self.get_params()
68 | self.init_params(initializer=None, arg_params=arg_params,
69 | aux_params=aux_params, force_init=True)
70 |
71 | def clip_gradients(self, threshold):
72 | """clip gradients
73 | """
74 | for grads in self._exec_group.grad_arrays:
75 | for grad in grads:
76 | grad -= grad - \
77 | mx.nd.clip(grad, -1.0 * threshold, 1.0 * threshold).copy()
78 |
79 |
80 | def norm_clipping(self, threshold=1.0):
81 | """Clip the norm according to the threshold.
82 | All the gradients are concatenated to a single vector and the overall norm is calculated.
83 | Follows `[ICML2013] On the difficulty of training recurrent neural networks`
84 | Parameters
85 | ----------
86 | threshold : float, optional
87 | Returns
88 | -------
89 | norm_val : float
90 | The norm value. It could be used to measure whether the gradients are stable.
91 | """
92 | assert self.binded and self.params_initialized
93 | norm_val = self.get_global_norm_val()
94 | if norm_val > threshold:
95 | ratio = threshold / float(norm_val)
96 | for grads in self._exec_group.grad_arrays:
97 | for grad in grads:
98 | grad[:] *= ratio
99 | return norm_val
100 |
101 | def get_global_norm_val(self):
102 | """Get the overall gradient norm ||W||_2
103 | Parameters
104 | ----------
105 | net : mx.mod.Module
106 | Returns
107 | -------
108 | norm_val : float
109 | """
110 | assert self.binded and self.params_initialized
111 | #TODO The code in the following will cause the estimated norm to be different for multiple gpus
112 | norm_val = 0.0
113 | for i in range(len(self._exec_group.grad_arrays[0])):
114 | norm_val += np.sqrt(
115 | sum([nd.norm(grads[i]).asnumpy()[0] ** 2
116 | for grads in self._exec_group.grad_arrays]))
117 | norm_val /= float(len(self._exec_group.grad_arrays[0]))
118 | return norm_val
119 |
--------------------------------------------------------------------------------
/baseline/preprocess_ilds_image.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import csv
4 | import random
5 | import glob
6 | import numpy as np
7 |
8 | ROOT = '/data3/matt/iLIDS-VID'
9 | output = '/data3/matt/iLIDS-VID/recs'
10 | im2rec = '/home/zhangjianfu/reid/mxnet/bin/im2rec'
11 |
12 | sets = 0
13 |
14 | def load_split():
15 | train, test, pool = [], [], []
16 | images, cnt = glob.glob('%s/i-LIDS-VID/images/cam1/person*/*.png' % (ROOT)), 0
17 | for i in images:
18 | t = int(i.split('/')[-2][-3:])
19 | cnt += 1
20 | pool.append(t)
21 | train = random.sample(pool, 150)
22 | for i in pool:
23 | if i not in train:
24 | test.append(i)
25 | print train, test
26 | print len(train), len(test)
27 | return train, test
28 |
29 | def rnd_pos(N, i):
30 | x = random.randint(0, N - 2)
31 | return x + 1 if x == i else x
32 |
33 | def save_rec(lst, path, name):
34 | lst_file = '%s/%s.lst' % (path, name)
35 | rec_file = '%s/%s.rec' % (path, name)
36 | #print lst_file, rec_file, '%s %s %s %s resize=128 quality=90' % (im2rec, lst_file, ROOT, rec_file)
37 | f = open(lst_file, "w")
38 | fo = csv.writer(f, delimiter='\t', lineterminator='\n')
39 | for item in lst:
40 | fo.writerow(item)
41 | f.close()
42 | os.system('%s %s %s %s resize=128 quality=90' % (im2rec, lst_file, ROOT, rec_file))
43 |
44 | def save_train(f, is_valid=False):
45 | plst, nlst, cnt, N, pool = [], [], 0, len(f[0]), [_ for _ in xrange(len(f[0]))]
46 | for _ in xrange(10000 if not is_valid else 200):
47 | ts = random.sample(pool, 96)
48 | ns, ps = ts[:64], ts[64:]
49 | for r in xrange(32):
50 | i, x, y = ps[r], ns[r + r], ns[r + r + 1]
51 | p1 = (cnt, i, f[0][i][random.randint(0, len(f[0][i]) - 1)])
52 | p2 = (cnt + 1, i, f[1][i][random.randint(0, len(f[1][i]) - 1)])
53 | n1 = (cnt, x, f[1][x][random.randint(0, len(f[1][x]) - 1)])
54 | n2 = (cnt + 1, y, f[0][y][random.randint(0, len(f[0][y]) - 1)])
55 | cnt += 2
56 | plst.append(p1)
57 | plst.append(p2)
58 | nlst.append(n1)
59 | nlst.append(n2)
60 | save_rec(plst, output, 'image_' + ('valid' if is_valid else 'train') + '_even'+ str(sets))
61 | save_rec(nlst, output, 'image_' + ('valid' if is_valid else 'train') + '_rand'+ str(sets))
62 |
63 | def save_test(f):
64 | lst, cnt_lst, cnt = [], [], 0
65 | '''for i in xrange(len(f[0])):
66 | lst.append((i * 2, 0, f[0][i][0]))
67 | lst.append((i * 2 + 1, 0, f[1][i][0]))'''
68 | for i in xrange(len(f[0])):
69 | cnt_lst.append(cnt)
70 | for j in f[0][i]:
71 | lst.append((cnt, 0, j))
72 | cnt += 1
73 | for i in xrange(len(f[1])):
74 | cnt_lst.append(cnt)
75 | for j in f[1][i]:
76 | lst.append((cnt, 1, j))
77 | cnt += 1
78 | cnt_lst.append(cnt)
79 | np.savetxt(output + '/image_test' + str(sets) + '.txt', np.array(cnt_lst), fmt='%d')
80 | save_rec(lst, output, 'image_test'+ str(sets))
81 |
82 | def save_valid(f):
83 | lst, cnt_lst, cnt = [], [], 0
84 | '''for i in xrange(len(f[0])):
85 | lst.append((i * 2, 0, f[0][i][0]))
86 | lst.append((i * 2 + 1, 0, f[1][i][0]))'''
87 | for i in xrange(len(f[0])):
88 | cnt_lst.append(cnt)
89 | for j in f[0][i]:
90 | lst.append((cnt, 0, j))
91 | cnt += 1
92 | for i in xrange(len(f[1])):
93 | cnt_lst.append(cnt)
94 | for j in f[1][i]:
95 | lst.append((cnt, 1, j))
96 | cnt += 1
97 | cnt_lst.append(cnt)
98 | np.savetxt(output + '/image_valid' + str(sets) + '.txt', np.array(cnt_lst), fmt='%d')
99 | save_rec(lst, output, 'image_valid'+ str(sets))
100 |
101 |
102 | def gen(train_lst, test_lst, ifshuffle):
103 | if ifshuffle:
104 | random.shuffle(train_lst)
105 | random.shuffle(test_lst)
106 | train, valid, test = [[], []], [[], []], [[], []]
107 | for i in xrange(3):
108 | lst = train_lst if i <= 1 else test_lst
109 | pool = train if i == 0 else (valid if i == 1 else test)
110 | for k in lst:
111 | for j in xrange(2):
112 | sets = 'images' if i == 1 else 'sequences'
113 | images = glob.glob('%s/i-LIDS-VID/%s/cam%d/person%03d/*.png' % (ROOT, sets, j + 1, k))
114 | #print k, j, images
115 | assert len(images) >= 1
116 | g = [_[len(ROOT):] for _ in images]
117 | pool[j].append(g) # fix prefix
118 |
119 | save_train(train)
120 | save_train(valid, is_valid=True)
121 | save_test(test)
122 | save_valid(train)
123 |
124 | if __name__ == '__main__':
125 | for i in xrange(10):
126 | print 'sets', sets
127 | train, test = load_split()
128 | gen(train, test, ifshuffle=True)
129 | sets += 1
130 |
131 |
--------------------------------------------------------------------------------
/baseline/preprocess_prid_image.py:
--------------------------------------------------------------------------------
1 | import os
2 | import subprocess
3 | import csv
4 | import random
5 | import glob
6 | import numpy as np
7 |
8 | ROOT = '/data3/matt/prid_2011'
9 | output = '/data3/matt/prid_2011/recs'
10 | im2rec = '/home/tina/reid/mxnet/bin/im2rec'
11 | sets = 0
12 |
13 | def load_split():
14 | train, test, pool = [], [], []
15 | cnt = 0
16 | for i in xrange(386):
17 | cam_a = glob.glob('%s/multi_shot/cam_a/person_%04d/*.png' % (ROOT, i))
18 | cam_b = glob.glob('%s/multi_shot/cam_b/person_%04d/*.png' % (ROOT, i))
19 | if len(cam_a) * len(cam_b) > 0:
20 | cnt += 1
21 | pool.append(i)
22 | if cnt >= 200:
23 | break
24 | train = random.sample(pool, 100)
25 | for i in pool:
26 | if i not in train:
27 | test.append(i)
28 | print train, test
29 | print len(train), len(test)
30 | return train, test
31 |
32 | def rnd_pos(N, i):
33 | x = random.randrange(0, N - 1)
34 | return x + 1 if x == i else x
35 |
36 | def save_rec(lst, path, name):
37 | lst_file = '%s/%s.lst' % (path, name)
38 | rec_file = '%s/%s.rec' % (path, name)
39 | #print lst_file, rec_file, '%s %s %s %s resize=128 quality=90' % (im2rec, lst_file, ROOT, rec_file)
40 | f = open(lst_file, "w")
41 | fo = csv.writer(f, delimiter='\t', lineterminator='\n')
42 | for item in lst:
43 | fo.writerow(item)
44 | f.close()
45 | os.system('%s %s %s %s resize=128 quality=90' % (im2rec, lst_file, ROOT, rec_file))
46 |
47 | def save_train(f, is_valid=False):
48 | plst, nlst, cnt, N, pool = [], [], 0, len(f[0]), [_ for _ in xrange(len(f[0]))]
49 | for _ in xrange(10000 if not is_valid else 200):
50 | ts = random.sample(pool, 96)
51 | ns, ps = ts[:64], ts[64:]
52 | for r in xrange(32):
53 | i, x, y = ps[r], ns[r + r], ns[r + r + 1]
54 | p1 = (cnt, i, f[0][i][random.randrange(len(f[0][i]))])
55 | p2 = (cnt + 1, i, f[1][i][random.randrange(len(f[1][i]))])
56 | n1 = (cnt, x, f[1][x][random.randrange(len(f[1][x]))])
57 | n2 = (cnt + 1, y, f[0][y][random.randrange(len(f[0][y]))])
58 | cnt += 2
59 | plst.append(p1)
60 | plst.append(p2)
61 | nlst.append(n1)
62 | nlst.append(n2)
63 | save_rec(plst, output, 'image_' + ('valid' if is_valid else 'train') + '_even'+ str(sets))
64 | save_rec(nlst, output, 'image_' + ('valid' if is_valid else 'train') + '_rand'+ str(sets))
65 |
66 | def save_test(f):
67 | lst, cnt_lst, cnt = [], [], 0
68 | '''for i in xrange(len(f[0])):
69 | lst.append((i * 2, 0, f[0][i][0]))
70 | lst.append((i * 2 + 1, 0, f[1][i][0]))'''
71 | for i in xrange(len(f[0])):
72 | cnt_lst.append(cnt)
73 | for j in f[0][i]:
74 | lst.append((cnt, 0, j))
75 | cnt += 1
76 | for i in xrange(len(f[1])):
77 | cnt_lst.append(cnt)
78 | for j in f[1][i]:
79 | lst.append((cnt, 1, j))
80 | cnt += 1
81 | cnt_lst.append(cnt)
82 | np.savetxt(output + '/image_test' + str(sets) + '.txt', np.array(cnt_lst), fmt='%d')
83 | save_rec(lst, output, 'image_test'+ str(sets))
84 |
85 | def save_valid(f):
86 | lst, cnt_lst, cnt = [], [], 0
87 | '''for i in xrange(len(f[0])):
88 | lst.append((i * 2, 0, f[0][i][0]))
89 | lst.append((i * 2 + 1, 0, f[1][i][0]))'''
90 | for i in xrange(len(f[0])):
91 | cnt_lst.append(cnt)
92 | for j in f[0][i]:
93 | lst.append((cnt, 0, j))
94 | cnt += 1
95 | for i in xrange(len(f[1])):
96 | cnt_lst.append(cnt)
97 | for j in f[1][i]:
98 | lst.append((cnt, 1, j))
99 | cnt += 1
100 | cnt_lst.append(cnt)
101 | np.savetxt(output + '/image_valid' + str(sets) + '.txt', np.array(cnt_lst), fmt='%d')
102 | save_rec(lst, output, 'image_valid'+ str(sets))
103 |
104 | def gen(train_lst, test_lst, ifshuffle):
105 | if ifshuffle:
106 | random.shuffle(train_lst)
107 | random.shuffle(test_lst)
108 | train, valid, test = [[], []], [[], []], [[], []]
109 | for i in xrange(3):
110 | lst = train_lst if i <= 1 else test_lst
111 | pool = train if i == 0 else (valid if i == 1 else test)
112 | for k in lst:
113 | for j in xrange(2):
114 | sets = 'multi_shot'
115 | images = glob.glob('%s/%s/cam_%s/person_%04d/*.png' % (ROOT, sets, 'a' if j == 0 else 'b', k))
116 | #print k, j, images
117 | assert len(images) >= 1
118 | g = [_[len(ROOT):] for _ in images]
119 | pool[j].append(g) # fix prefix
120 |
121 | save_train(train)
122 | save_train(valid, is_valid=True)
123 | save_test(test)
124 | save_valid(train)
125 |
126 | if __name__ == '__main__':
127 | for i in xrange(10):
128 | print 'sets', sets
129 | train, test = load_split()
130 | gen(train, test, ifshuffle=True)
131 | sets += 1
132 |
--------------------------------------------------------------------------------
/RL/agent.py:
--------------------------------------------------------------------------------
1 | import sys
2 | #sys.path.insert(0, "mxnet/python/")
3 | import find_mxnet
4 | import mxnet as mx
5 | from mxnet.optimizer import SGD, Adam, RMSProp
6 |
7 | import numpy as np
8 |
9 | from symbols import sym_base_net, sym_DQN
10 | from utils import load_checkpoint, TimeInvScheduler, dist, copyto
11 | from base_module import BaseModule
12 | import os
13 |
14 | def create_moduleQ(data1, data2, ctx, seq_len, num_sim, num_hidden, num_acts, min_states, min_imgs, fusion=False, bn=False, is_train=False, nh=False, is_e2e=False):
15 | os.environ['MXNET_EXEC_INPLACE_GRAD_SUM_CAP'] = str(100)
16 | net = sym_DQN(data1, data2, num_sim, num_hidden, is_train=is_train, num_acts=num_acts, min_states=min_states, min_imgs=min_imgs, fusion=fusion, bn=bn, global_stats=False, no_his=False)
17 | mod = BaseModule(symbol=net, data_names=('data1', 'data2'), label_names=None,
18 | fixed_param_names=[] if is_e2e else ['data1', 'data2'], context=ctx)
19 | mod.bind(data_shapes=[('data1', (seq_len, 3, 224, 112)),
20 | ('data2', (seq_len, 3, 224, 112))],
21 | for_training=is_train, inputs_need_grad=False)
22 | return mod
23 |
24 |
25 | def get_optimizer(args):
26 | assert args.optimizer in ['sgd', 'adam', 'rms']
27 |
28 | if args.optimizer == 'sgd':
29 | stepPerEpoch = args.num_examples
30 | lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(step=[stepPerEpoch * int(x) for x in args.lr_step.split(',')], factor=0.1)
31 | #lr_scheduler = TimeInvScheduler(step=args.tisr) # Time inverse scheduler
32 | return SGD(learning_rate=args.lr, momentum=0.9,
33 | wd=0.0001, clip_gradient=10, lr_scheduler=lr_scheduler,
34 | rescale_grad=1.0)
35 | elif args.optimizer == 'rms':
36 | return RMSProp(learning_rate=args.lr, wd=0.0001)
37 | else:
38 | return Adam(learning_rate=args.lr, wd=0.0001, clip_gradient=10)
39 |
40 |
41 | def get_Qvalue(Q, data, is_train=False):
42 | data_batch = mx.io.DataBatch([data[0], data[1]], [])
43 | Q.forward(data_batch, is_train=is_train)
44 | return Q.get_outputs()[0].asnumpy()
45 |
46 |
47 | def wash(data, ctx):
48 | ret = []
49 | if isinstance(data[0], list):
50 | for i in xrange(len(data[0])):
51 | t = []
52 | for j in xrange(len(data)):
53 | t.append(np.expand_dims(data[j][i], axis=0) if data[j][i].shape[0] > 1 or len(data[j][i].shape) == 1 else data[j][i])
54 | t = np.concatenate(t)
55 | ret.append(mx.nd.array(t, ctx=ctx))
56 | else:
57 | for i in xrange(len(data)):
58 | ret.append(mx.nd.array(data[i], ctx=ctx))
59 | return ret
60 |
61 |
62 | class Agent:
63 | def __init__(self, args, devices):
64 | self.e2e = args.e2e
65 | self.his = args.history
66 | arg_params, aux_params = load_checkpoint('../baseline/models/%s' % args.model_load_prefix, args.model_load_epoch)
67 | data1, data2 = sym_base_net(args.network, is_train=args.e2e, global_stats=True)
68 | init = mx.initializer.Xavier(rnd_type='gaussian', factor_type='in', magnitude=2)
69 | opt = get_optimizer(args)
70 | self.Q = create_moduleQ(data1, data2, devices, args.sample_size, args.num_sim, args.num_hidden, args.num_acts, args.min_states, args.min_imgs, fusion=args.fusion, is_train=True, nh=not args.history, is_e2e=args.e2e, bn=args.q_bn)
71 | self.Q.init_params(initializer=init,
72 | arg_params=arg_params,
73 | aux_params=aux_params,
74 | allow_missing=True,
75 | force_init=True)
76 | self.Q.init_optimizer(optimizer=opt)
77 | self.target_cnt = 1
78 | self.devices = devices
79 | self.prefix = 'models/%s' % args.mode
80 | self.batch_size = args.batch_size
81 | self.update_cnt = 0
82 | self.Q.clear_gradients()
83 | self.gradQ = [[grad.copyto(grad.context) for grad in grads] for grads in self.Q._exec_group.grad_arrays]
84 |
85 | def wash_data(self, data):
86 | return wash(data, self.devices)
87 |
88 | def get_Qvalue(self, data, is_train=False):
89 | return get_Qvalue(self.Q, data, is_train=is_train)
90 |
91 | def update(self, grad):
92 | self.Q.backward(grad)
93 | for gradsr, gradsf in zip(self.Q._exec_group.grad_arrays, self.gradQ):
94 | for gradr, gradf in zip(gradsr, gradsf):
95 | gradf += gradr
96 | self.Q.clear_gradients()
97 | self.update_cnt += 1
98 | if self.update_cnt % self.batch_size == 0:
99 | print 'update', self.update_cnt
100 | for gradsr, gradsf in zip(self.Q._exec_group.grad_arrays, self.gradQ):
101 | for gradr, gradf in zip(gradsr, gradsf):
102 | gradr[:] = gradf[:] / self.batch_size
103 | self.Q.update()
104 | for grads in self.gradQ:
105 | for grad in grads:
106 | grad[:] = 0
107 |
108 | def save(self, e):
109 | self.Q.save_params('%s-%04d.params'%(self.prefix, e))
110 |
--------------------------------------------------------------------------------
/baseline/symbol_inception-bn.py:
--------------------------------------------------------------------------------
1 | """
2 |
3 | Inception + BN, suitable for images with around 224 x 224
4 |
5 | Reference:
6 |
7 | Sergey Ioffe and Christian Szegedy. Batch normalization: Accelerating deep
8 | network training by reducing internal covariate shift. arXiv preprint
9 | arXiv:1502.03167, 2015.
10 |
11 | """
12 |
13 | import find_mxnet
14 | import mxnet as mx
15 |
16 | eps = 1e-10 + 1e-5
17 | bn_mom = 0.9
18 | fix_gamma = False
19 |
20 |
21 | def ConvFactory(data, num_filter, kernel, stride=(1,1), pad=(0, 0), name=None, suffix=''):
22 | conv = mx.symbol.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, name='conv_%s%s' %(name, suffix))
23 | bn = mx.symbol.BatchNorm(data=conv, fix_gamma=fix_gamma, eps=eps, momentum=bn_mom, name='bn_%s%s' %(name, suffix))
24 | act = mx.symbol.Activation(data=bn, act_type='relu', name='relu_%s%s' %(name, suffix))
25 | return act
26 |
27 | def InceptionFactoryA(data, num_1x1, num_3x3red, num_3x3, num_d3x3red, num_d3x3, pool, proj, name):
28 | # 1x1
29 | c1x1 = ConvFactory(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_1x1' % name))
30 | # 3x3 reduce + 3x3
31 | c3x3r = ConvFactory(data=data, num_filter=num_3x3red, kernel=(1, 1), name=('%s_3x3' % name), suffix='_reduce')
32 | c3x3 = ConvFactory(data=c3x3r, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_3x3' % name))
33 | # double 3x3 reduce + double 3x3
34 | cd3x3r = ConvFactory(data=data, num_filter=num_d3x3red, kernel=(1, 1), name=('%s_double_3x3' % name), suffix='_reduce')
35 | cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), name=('%s_double_3x3_0' % name))
36 | cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), name=('%s_double_3x3_1' % name))
37 | # pool + proj
38 | pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
39 | cproj = ConvFactory(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_proj' % name))
40 | # concat
41 | concat = mx.symbol.Concat(*[c1x1, c3x3, cd3x3, cproj], name='ch_concat_%s_chconcat' % name)
42 | return concat
43 |
44 | def InceptionFactoryB(data, num_3x3red, num_3x3, num_d3x3red, num_d3x3, name):
45 | # 3x3 reduce + 3x3
46 | c3x3r = ConvFactory(data=data, num_filter=num_3x3red, kernel=(1, 1), name=('%s_3x3' % name), suffix='_reduce')
47 | c3x3 = ConvFactory(data=c3x3r, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_3x3' % name))
48 | # double 3x3 reduce + double 3x3
49 | cd3x3r = ConvFactory(data=data, num_filter=num_d3x3red, kernel=(1, 1), name=('%s_double_3x3' % name), suffix='_reduce')
50 | cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_double_3x3_0' % name))
51 | cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_double_3x3_1' % name))
52 | # pool + proj
53 | pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type="max", name=('max_pool_%s_pool' % name))
54 | # concat
55 | concat = mx.symbol.Concat(*[c3x3, cd3x3, pooling], name='ch_concat_%s_chconcat' % name)
56 | return concat
57 |
58 | def get_symbol(num_classes=1000):
59 | # data
60 | data = mx.symbol.Variable(name="data")
61 | # stage 1
62 | conv1 = ConvFactory(data=data, num_filter=64, kernel=(7, 7), stride=(2, 2), pad=(3, 3), name='1')
63 | pool1 = mx.symbol.Pooling(data=conv1, kernel=(3, 3), stride=(2, 2), name='pool_1', pool_type='max')
64 | # stage 2
65 | conv2red = ConvFactory(data=pool1, num_filter=64, kernel=(1, 1), stride=(1, 1), name='2_red')
66 | conv2 = ConvFactory(data=conv2red, num_filter=192, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='2')
67 | pool2 = mx.symbol.Pooling(data=conv2, kernel=(3, 3), stride=(2, 2), name='pool_2', pool_type='max')
68 | # stage 2
69 | in3a = InceptionFactoryA(pool2, 64, 64, 64, 64, 96, "avg", 32, '3a')
70 | in3b = InceptionFactoryA(in3a, 64, 64, 96, 64, 96, "avg", 64, '3b')
71 | in3c = InceptionFactoryB(in3b, 128, 160, 64, 96, '3c')
72 | # stage 3
73 | in4a = InceptionFactoryA(in3c, 224, 64, 96, 96, 128, "avg", 128, '4a')
74 | in4b = InceptionFactoryA(in4a, 192, 96, 128, 96, 128, "avg", 128, '4b')
75 | in4c = InceptionFactoryA(in4b, 160, 128, 160, 128, 160, "avg", 128, '4c')
76 | in4d = InceptionFactoryA(in4c, 96, 128, 192, 160, 192, "avg", 128, '4d')
77 | in4e = InceptionFactoryB(in4d, 128, 192, 192, 256, '4e')
78 | # stage 4
79 | in5a = InceptionFactoryA(in4e, 352, 192, 320, 160, 224, "avg", 128, '5a')
80 | in5b = InceptionFactoryA(in5a, 352, 192, 320, 192, 224, "max", 128, '5b')
81 | # global avg pooling
82 | #avg = mx.symbol.Pooling(data=in5b, kernel=(7, 7), stride=(1, 1), name="global_pool", pool_type='avg')
83 | # linear classifier
84 | #flatten = mx.symbol.Flatten(data=avg, name='flatten')
85 | #fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=num_classes, name='fc1')
86 | #softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
87 | return in5b
88 |
89 |
90 | if __name__ == '__main__':
91 | sym = get_symbol()
92 | mx.viz.print_summary(sym, {'data': (1, 3, 128, 64)})
93 |
--------------------------------------------------------------------------------
/RL/segment_tree.py:
--------------------------------------------------------------------------------
1 | import operator
2 |
3 |
4 | class SegmentTree(object):
5 | def __init__(self, capacity, operation, neutral_element):
6 | """Build a Segment Tree data structure.
7 | https://en.wikipedia.org/wiki/Segment_tree
8 | Can be used as regular array, but with two
9 | important differences:
10 | a) setting item's value is slightly slower.
11 | It is O(lg capacity) instead of O(1).
12 | b) user has access to an efficient `reduce`
13 | operation which reduces `operation` over
14 | a contiguous subsequence of items in the
15 | array.
16 | Paramters
17 | ---------
18 | capacity: int
19 | Total size of the array - must be a power of two.
20 | operation: lambda obj, obj -> obj
21 | and operation for combining elements (eg. sum, max)
22 | must for a mathematical group together with the set of
23 | possible values for array elements.
24 | neutral_element: obj
25 | neutral element for the operation above. eg. float('-inf')
26 | for max and 0 for sum.
27 | """
28 | assert capacity > 0 and capacity & (capacity - 1) == 0, "capacity must be positive and a power of 2."
29 | self._capacity = capacity
30 | self._value = [neutral_element for _ in range(2 * capacity)]
31 | self._operation = operation
32 |
33 | def _reduce_helper(self, start, end, node, node_start, node_end):
34 | if start == node_start and end == node_end:
35 | return self._value[node]
36 | mid = (node_start + node_end) // 2
37 | if end <= mid:
38 | return self._reduce_helper(start, end, 2 * node, node_start, mid)
39 | else:
40 | if mid + 1 <= start:
41 | return self._reduce_helper(start, end, 2 * node + 1, mid + 1, node_end)
42 | else:
43 | return self._operation(
44 | self._reduce_helper(start, mid, 2 * node, node_start, mid),
45 | self._reduce_helper(mid + 1, end, 2 * node + 1, mid + 1, node_end)
46 | )
47 |
48 | def reduce(self, start=0, end=None):
49 | """Returns result of applying `self.operation`
50 | to a contiguous subsequence of the array.
51 | self.operation(arr[start], operation(arr[start+1], operation(... arr[end])))
52 | Parameters
53 | ----------
54 | start: int
55 | beginning of the subsequence
56 | end: int
57 | end of the subsequences
58 | Returns
59 | -------
60 | reduced: obj
61 | result of reducing self.operation over the specified range of array elements.
62 | """
63 | if end is None:
64 | end = self._capacity
65 | if end < 0:
66 | end += self._capacity
67 | end -= 1
68 | return self._reduce_helper(start, end, 1, 0, self._capacity - 1)
69 |
70 | def __setitem__(self, idx, val):
71 | # index of the leaf
72 | idx += self._capacity
73 | self._value[idx] = val
74 | idx //= 2
75 | while idx >= 1:
76 | self._value[idx] = self._operation(
77 | self._value[2 * idx],
78 | self._value[2 * idx + 1]
79 | )
80 | idx //= 2
81 |
82 | def __getitem__(self, idx):
83 | assert 0 <= idx < self._capacity
84 | return self._value[self._capacity + idx]
85 |
86 |
87 | class SumSegmentTree(SegmentTree):
88 | def __init__(self, capacity):
89 | super(SumSegmentTree, self).__init__(
90 | capacity=capacity,
91 | operation=operator.add,
92 | neutral_element=0.0
93 | )
94 |
95 | def sum(self, start=0, end=None):
96 | """Returns arr[start] + ... + arr[end]"""
97 | return super(SumSegmentTree, self).reduce(start, end)
98 |
99 | def find_prefixsum_idx(self, prefixsum):
100 | """Find the highest index `i` in the array such that
101 | sum(arr[0] + arr[1] + ... + arr[i - i]) <= prefixsum
102 | if array values are probabilities, this function
103 | allows to sample indexes according to the discrete
104 | probability efficiently.
105 | Parameters
106 | ----------
107 | perfixsum: float
108 | upperbound on the sum of array prefix
109 | Returns
110 | -------
111 | idx: int
112 | highest index satisfying the prefixsum constraint
113 | """
114 | assert 0 <= prefixsum <= self.sum() + 1e-5
115 | idx = 1
116 | while idx < self._capacity: # while non-leaf
117 | if self._value[2 * idx] > prefixsum:
118 | idx = 2 * idx
119 | else:
120 | prefixsum -= self._value[2 * idx]
121 | idx = 2 * idx + 1
122 | return idx - self._capacity
123 |
124 |
125 | class MinSegmentTree(SegmentTree):
126 | def __init__(self, capacity):
127 | super(MinSegmentTree, self).__init__(
128 | capacity=capacity,
129 | operation=min,
130 | neutral_element=float('inf')
131 | )
132 |
133 | def min(self, start=0, end=None):
134 | """Returns min(arr[start], ..., arr[end])"""
135 |
136 | return super(MinSegmentTree, self).reduce(start, end)
137 |
--------------------------------------------------------------------------------
/RL/rnn_models.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 | import find_mxnet
3 | import mxnet as mx
4 |
5 | LSTMParam = namedtuple("LSTMParam", ["i2h_weight", "i2h_bias",
6 | "h2h_weight", "h2h_bias"])
7 |
8 | def get_lstm_cell(i):
9 | return LSTMParam(i2h_weight = mx.sym.Variable("l%d_i2h_weight" % i),
10 | i2h_bias = mx.sym.Variable("l%d_i2h_bias" % i),
11 | h2h_weight = mx.sym.Variable("l%d_h2h_weight" % i),
12 | h2h_bias = mx.sym.Variable("l%d_h2h_bias" % i))
13 |
14 | def lstm(num_hidden, indata, prev_h, prev_c, param, seqidx, layeridx, dropout=0.):
15 | """LSTM Cell symbol"""
16 | if dropout > 0.:
17 | indata = mx.sym.Dropout(data=indata, p=dropout)
18 | i2h = mx.sym.FullyConnected(data=indata,
19 | weight=param.i2h_weight,
20 | bias=param.i2h_bias,
21 | num_hidden=num_hidden * 4,
22 | name="t%d_l%d_i2h" % (seqidx, layeridx))
23 | h2h = mx.sym.FullyConnected(data=prev_h,
24 | weight=param.h2h_weight,
25 | bias=param.h2h_bias,
26 | num_hidden=num_hidden * 4,
27 | name="t%d_l%d_h2h" % (seqidx, layeridx))
28 | gates = i2h + h2h
29 | slice_gates = mx.sym.SliceChannel(gates, num_outputs=4,
30 | name="t%d_l%d_slice" % (seqidx, layeridx))
31 | in_gate = mx.sym.Activation(slice_gates[0], act_type="sigmoid")
32 | in_transform = mx.sym.Activation(slice_gates[1], act_type="tanh")
33 | forget_gate = mx.sym.Activation(slice_gates[2], act_type="sigmoid")
34 | out_gate = mx.sym.Activation(slice_gates[3], act_type="sigmoid")
35 | next_c = (forget_gate * prev_c) + (in_gate * in_transform)
36 | next_h = out_gate * mx.sym.Activation(next_c, act_type="tanh")
37 | return next_h, next_c
38 |
39 |
40 | GRUParam = namedtuple("GRUParam", ["gates_i2h_weight", "gates_i2h_bias",
41 | "gates_h2h_weight", "gates_h2h_bias",
42 | "trans_i2h_weight", "trans_i2h_bias",
43 | "trans_h2h_weight", "trans_h2h_bias"])
44 |
45 | def get_gru_cell(i):
46 | return GRUParam(gates_i2h_weight=mx.sym.Variable("l%d_i2h_gates_weight" % i),
47 | gates_i2h_bias=mx.sym.Variable("l%d_i2h_gates_bias" % i),
48 | gates_h2h_weight=mx.sym.Variable("l%d_h2h_gates_weight" % i),
49 | gates_h2h_bias=mx.sym.Variable("l%d_h2h_gates_bias" % i),
50 | trans_i2h_weight=mx.sym.Variable("l%d_i2h_trans_weight" % i),
51 | trans_i2h_bias=mx.sym.Variable("l%d_i2h_trans_bias" % i),
52 | trans_h2h_weight=mx.sym.Variable("l%d_h2h_trans_weight" % i),
53 | trans_h2h_bias=mx.sym.Variable("l%d_h2h_trans_bias" % i))
54 |
55 | def gru(num_hidden, indata, prev_h, param, seqidx, layeridx, dropout=0.):
56 | """
57 | GRU Cell symbol
58 | Reference:
59 | * Chung, Junyoung, et al. "Empirical evaluation of gated recurrent neural
60 | networks on sequence modeling." arXiv preprint arXiv:1412.3555 (2014).
61 | """
62 | if dropout > 0.:
63 | indata = mx.sym.Dropout(data=indata, p=dropout)
64 | i2h = mx.sym.FullyConnected(data=indata,
65 | weight=param.gates_i2h_weight,
66 | bias=param.gates_i2h_bias,
67 | num_hidden=num_hidden * 2,
68 | name="t%d_l%d_gates_i2h" % (seqidx, layeridx))
69 | h2h = mx.sym.FullyConnected(data=prev_h,
70 | weight=param.gates_h2h_weight,
71 | bias=param.gates_h2h_bias,
72 | num_hidden=num_hidden * 2,
73 | name="t%d_l%d_gates_h2h" % (seqidx, layeridx))
74 | gates = i2h + h2h
75 | slice_gates = mx.sym.SliceChannel(gates, num_outputs=2,
76 | name="t%d_l%d_slice" % (seqidx, layeridx))
77 | update_gate = mx.sym.Activation(slice_gates[0], act_type="sigmoid")
78 | reset_gate = mx.sym.Activation(slice_gates[1], act_type="sigmoid")
79 | # The transform part of GRU is a little magic
80 | htrans_i2h = mx.sym.FullyConnected(data=indata,
81 | weight=param.trans_i2h_weight,
82 | bias=param.trans_i2h_bias,
83 | num_hidden=num_hidden,
84 | name="t%d_l%d_trans_i2h" % (seqidx, layeridx))
85 | h_after_reset = prev_h * reset_gate
86 | htrans_h2h = mx.sym.FullyConnected(data=h_after_reset,
87 | weight=param.trans_h2h_weight,
88 | bias=param.trans_h2h_bias,
89 | num_hidden=num_hidden,
90 | name="t%d_l%d_trans_i2h" % (seqidx, layeridx))
91 | h_trans = htrans_i2h + htrans_h2h
92 | h_trans_active = mx.sym.Activation(h_trans, act_type="tanh")
93 | next_h = prev_h + update_gate * (h_trans_active - prev_h)
94 | return next_h
95 |
--------------------------------------------------------------------------------
/RL/symbol_inception-bn.py:
--------------------------------------------------------------------------------
1 | """
2 |
3 | Inception + BN, suitable for images with around 224 x 224
4 |
5 | Reference:
6 |
7 | Sergey Ioffe and Christian Szegedy. Batch normalization: Accelerating deep
8 | network training by reducing internal covariate shift. arXiv preprint
9 | arXiv:1502.03167, 2015.
10 |
11 | """
12 |
13 | import find_mxnet
14 | import mxnet as mx
15 |
16 | eps = 1e-10 + 1e-5
17 | bn_mom = 0.9
18 | #fix_gamma_flag = False
19 |
20 |
21 | def ConvFactory(data, num_filter, kernel, stride=(1,1), pad=(0, 0), name=None, suffix=''):
22 | conv = mx.symbol.Convolution(data=data, num_filter=num_filter, kernel=kernel, stride=stride, pad=pad, name='conv_%s%s' %(name, suffix))
23 | bn = mx.symbol.BatchNorm(data=conv, fix_gamma=fix_gamma_flag, eps=eps, momentum=bn_mom, use_global_stats=global_stats_flag, name='bn_%s%s' %(name, suffix))
24 | act = mx.symbol.Activation(data=bn, act_type='relu', name='relu_%s%s' %(name, suffix))
25 | return act
26 |
27 | def InceptionFactoryA(data, num_1x1, num_3x3red, num_3x3, num_d3x3red, num_d3x3, pool, proj, name):
28 | # 1x1
29 | c1x1 = ConvFactory(data=data, num_filter=num_1x1, kernel=(1, 1), name=('%s_1x1' % name))
30 | # 3x3 reduce + 3x3
31 | c3x3r = ConvFactory(data=data, num_filter=num_3x3red, kernel=(1, 1), name=('%s_3x3' % name), suffix='_reduce')
32 | c3x3 = ConvFactory(data=c3x3r, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), name=('%s_3x3' % name))
33 | # double 3x3 reduce + double 3x3
34 | cd3x3r = ConvFactory(data=data, num_filter=num_d3x3red, kernel=(1, 1), name=('%s_double_3x3' % name), suffix='_reduce')
35 | cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), name=('%s_double_3x3_0' % name))
36 | cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), name=('%s_double_3x3_1' % name))
37 | # pool + proj
38 | pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(1, 1), pad=(1, 1), pool_type=pool, name=('%s_pool_%s_pool' % (pool, name)))
39 | cproj = ConvFactory(data=pooling, num_filter=proj, kernel=(1, 1), name=('%s_proj' % name))
40 | # concat
41 | concat = mx.symbol.Concat(*[c1x1, c3x3, cd3x3, cproj], name='ch_concat_%s_chconcat' % name)
42 | return concat
43 |
44 | def InceptionFactoryB(data, num_3x3red, num_3x3, num_d3x3red, num_d3x3, name):
45 | # 3x3 reduce + 3x3
46 | c3x3r = ConvFactory(data=data, num_filter=num_3x3red, kernel=(1, 1), name=('%s_3x3' % name), suffix='_reduce')
47 | c3x3 = ConvFactory(data=c3x3r, num_filter=num_3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_3x3' % name))
48 | # double 3x3 reduce + double 3x3
49 | cd3x3r = ConvFactory(data=data, num_filter=num_d3x3red, kernel=(1, 1), name=('%s_double_3x3' % name), suffix='_reduce')
50 | cd3x3 = ConvFactory(data=cd3x3r, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(1, 1), name=('%s_double_3x3_0' % name))
51 | cd3x3 = ConvFactory(data=cd3x3, num_filter=num_d3x3, kernel=(3, 3), pad=(1, 1), stride=(2, 2), name=('%s_double_3x3_1' % name))
52 | # pool + proj
53 | pooling = mx.symbol.Pooling(data=data, kernel=(3, 3), stride=(2, 2), pad=(1, 1), pool_type="max", name=('max_pool_%s_pool' % name))
54 | # concat
55 | concat = mx.symbol.Concat(*[c3x3, cd3x3, pooling], name='ch_concat_%s_chconcat' % name)
56 | return concat
57 |
58 | def get_symbol(data, num_classes=1000, fix_gamma=False, global_stats=False):
59 | global fix_gamma_flag, global_stats_flag
60 | fix_gamma_flag, global_stats_flag = fix_gamma, global_stats
61 | # data
62 | #data = mx.symbol.Variable(name="data")
63 | # stage 1
64 | conv1 = ConvFactory(data=data, num_filter=64, kernel=(7, 7), stride=(2, 2), pad=(3, 3), name='1')
65 | pool1 = mx.symbol.Pooling(data=conv1, kernel=(3, 3), stride=(2, 2), name='pool_1', pool_type='max')
66 | # stage 2
67 | conv2red = ConvFactory(data=pool1, num_filter=64, kernel=(1, 1), stride=(1, 1), name='2_red')
68 | conv2 = ConvFactory(data=conv2red, num_filter=192, kernel=(3, 3), stride=(1, 1), pad=(1, 1), name='2')
69 | pool2 = mx.symbol.Pooling(data=conv2, kernel=(3, 3), stride=(2, 2), name='pool_2', pool_type='max')
70 | pool2 = mx.symbol.BlockGrad(pool2, name='block_stage2')
71 | # stage 2
72 | in3a = InceptionFactoryA(pool2, 64, 64, 64, 64, 96, "avg", 32, '3a')
73 | in3b = InceptionFactoryA(in3a, 64, 64, 96, 64, 96, "avg", 64, '3b')
74 | in3c = InceptionFactoryB(in3b, 128, 160, 64, 96, '3c')
75 | # stage 3
76 | in4a = InceptionFactoryA(in3c, 224, 64, 96, 96, 128, "avg", 128, '4a')
77 | in4b = InceptionFactoryA(in4a, 192, 96, 128, 96, 128, "avg", 128, '4b')
78 | in4c = InceptionFactoryA(in4b, 160, 128, 160, 128, 160, "avg", 128, '4c')
79 | in4d = InceptionFactoryA(in4c, 96, 128, 192, 160, 192, "avg", 128, '4d')
80 | in4e = InceptionFactoryB(in4d, 128, 192, 192, 256, '4e')
81 | # stage 4
82 | in5a = InceptionFactoryA(in4e, 352, 192, 320, 160, 224, "avg", 128, '5a')
83 | in5b = InceptionFactoryA(in5a, 352, 192, 320, 192, 224, "max", 128, '5b')
84 | #in5b = mx.symbol.BlockGrad(in5b, name='block_in5b')
85 | # global avg pooling
86 | #avg = mx.symbol.Pooling(data=in5b, kernel=(7, 7), stride=(1, 1), name="global_pool", pool_type='avg')
87 | # linear classifier
88 | #flatten = mx.symbol.Flatten(data=avg, name='flatten')
89 | #fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=num_classes, name='fc1')
90 | #softmax = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
91 | return in5b
92 |
93 |
94 | if __name__ == '__main__':
95 | sym = get_symbol()
96 | mx.viz.print_summary(sym, {'data': (1, 3, 128, 64)})
97 |
--------------------------------------------------------------------------------
/baseline/even_iterator.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 | import numpy as np
3 | import cv2
4 | import random
5 |
6 |
7 | class Even_iterator(mx.io.DataIter):
8 | '''
9 | data iterator, shuffle data but always make pairs as neighbors
10 | for verification and triplet loss
11 | '''
12 | def __init__(self, lst_name, batch_size, aug_params=dict(), shuffle=False):
13 | super(Even_iterator, self).__init__()
14 | self.batch_size = batch_size
15 | self.aug_params = aug_params.copy()
16 | self.shuffle = shuffle
17 |
18 | self.data, self.labels = Even_iterator.load_data(lst_name)
19 | print "load data over"
20 | self.data_num = self.labels.shape[0]
21 | self.label_num = 1 if len(self.labels.shape) == 1 else self.labels.shape[1]
22 | print self.data_num, self.label_num
23 | self.reset()
24 |
25 | @staticmethod
26 | def load_data(lst_name):
27 | img_lst = [x.strip().split('\t')
28 | for x in file(lst_name).read().splitlines()]
29 | im = cv2.imread(img_lst[0][-1])
30 | h, w = im.shape[:2]
31 | n, m = len(img_lst), len(img_lst[0]) - 2
32 | data = np.zeros((n, h, w, 3), dtype=np.uint8)
33 | labels = np.zeros((n, m), dtype=np.int32) if m > 1 else np.zeros((n, ), dtype=np.int32)
34 |
35 | for i in range(len(img_lst)):
36 | im = cv2.imread(img_lst[i][-1])
37 |
38 | data[i] = im
39 | labels[i] = img_lst[i][1:-1] if m > 1 else img_lst[i][1]
40 |
41 | return data, labels
42 |
43 | @staticmethod
44 | def even_shuffle(labels):
45 | '''
46 | shuffle images lists and make pairs
47 | '''
48 | s = [(x, int(random.random() * 1e5), i) for i, x in enumerate(labels)]
49 | s = sorted(s, key=lambda x: (x[0], x[1]))
50 | lst = [x[2] for x in s]
51 |
52 | idx = range(0, len(lst), 2)
53 | random.shuffle(idx)
54 | ret = []
55 | for i in idx:
56 | ret.append(lst[i])
57 | ret.append(lst[i + 1])
58 |
59 | return ret
60 |
61 | @staticmethod
62 | def model_shuffle(labels):
63 | '''
64 | shuffle images and images with same model are grouped together
65 | '''
66 | models_idx = range(int(np.max(labels)) + 1)
67 | random.shuffle(models_idx)
68 | s = [(models_idx[x], int(random.random() * 1e5), i) for i, x in enumerate(labels)]
69 | s = sorted(s, key=lambda x: (x[0], x[1]))
70 | lst = [x[2] for x in s]
71 |
72 | return lst
73 |
74 | def reset(self):
75 | self.current = 0
76 | if self.shuffle:
77 | idx = Even_iterator.even_shuffle(self.labels)
78 | # idx = Even_iterator.model_shuffle(self.labels)
79 | self.data = self.data[idx]
80 | self.labels = self.labels[idx]
81 |
82 | @property
83 | def provide_data(self):
84 | shape = self.aug_params['input_shape']
85 |
86 | return [('data', (self.batch_size, shape[0], shape[1], shape[2]))]
87 |
88 | @property
89 | def provide_label(self):
90 | return [('softmax_label', (self.batch_size, self.label_num))]
91 |
92 | @staticmethod
93 | def augment(im, aug_params):
94 | '''
95 | augmentation (resize, crop, mirror)
96 | '''
97 | crop_h, crop_w = aug_params['input_shape'][1:]
98 | ori_h, ori_w = im.shape[:2]
99 | resize = aug_params['resize']
100 | if ori_h < ori_w:
101 | h, w = resize, int(float(resize) / ori_h * ori_w)
102 | else:
103 | h, w = int(float(resize) / ori_w * ori_h), resize
104 |
105 | if h != ori_h:
106 | im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
107 |
108 | x, y = (w - crop_w) / 2, (h - crop_h) / 2
109 | if aug_params['rand_crop']:
110 | x = random.randint(0, w - crop_w)
111 | y = random.randint(0, h - crop_h)
112 | im = im[y:y + crop_h, x:x + crop_w, :]
113 |
114 | # cv2.imshow("name", im.astype(np.uint8))
115 | # cv2.waitKey()
116 |
117 | im = np.transpose(im, (2, 0, 1))
118 | newim = np.zeros_like(im)
119 | newim[0] = im[2]
120 | newim[1] = im[1]
121 | newim[2] = im[0]
122 |
123 | if aug_params['rand_mirror'] and random.randint(0, 1) == 1:
124 | newim = newim[:, :, ::-1]
125 |
126 | return newim
127 |
128 | def next(self):
129 | if self.current + self.batch_size > self.data_num:
130 | raise StopIteration
131 |
132 | shape = self.aug_params['input_shape']
133 | x = np.zeros((self.batch_size, shape[0], shape[1], shape[2]))
134 | y = np.zeros((self.batch_size, self.label_num) if self.label_num > 1
135 | else (self.batch_size, ))
136 | index = []
137 | for i in range(self.current, self.current + self.batch_size):
138 | im = self.data[i]
139 | im.astype(np.float32)
140 | im = Even_iterator.augment(im, self.aug_params)
141 | x[i - self.current] = im
142 | y[i - self.current] = self.labels[i]
143 | index.append(i)
144 |
145 | x -= self.aug_params['mean']
146 |
147 | x = mx.nd.array(x)
148 | label = mx.nd.array(y)
149 |
150 | batch = mx.io.DataBatch(data=[x], label=[label], pad=0, index=index)
151 | self.current += self.batch_size
152 |
153 | return batch
154 |
--------------------------------------------------------------------------------
/RL/mars_test.py:
--------------------------------------------------------------------------------
1 | import sys
2 | #sys.path.insert(0, "mxnet/python/")
3 | import find_mxnet
4 | import mxnet as mx
5 | import numpy as np
6 | import argparse
7 | import random
8 |
9 |
10 | from symbols import sym_base_net, sym_DQN
11 | from utils import get_imRecordIter, load_checkpoint
12 |
13 |
14 | cmcs = [1, 5, 10, 20]
15 | cn = 4
16 |
17 |
18 | def norm_cnts(cnts, cnt):
19 | return [cnts[i] / cnt[0] for i in xrange(cn)]
20 |
21 |
22 | def update_cnts(cur, cnts, cnt):
23 | for j in xrange(cn):
24 | if cur < cmcs[j]:
25 | cnts[j] += 1.0
26 | cnt[0] += 1.0
27 |
28 |
29 | def parse_args():
30 | parser = argparse.ArgumentParser(
31 | description='single domain car recog training')
32 | parser.add_argument('--gpus', type=str, default='0',
33 | help='the gpus will be used, e.g "0,1"')
34 | parser.add_argument('--model-load-epoch', type=int, default=3,
35 | help='load the model on an epoch using the model-load-prefix')
36 | parser.add_argument('--model-load-prefix', type=str, default='mars-TEST-DQN_test-2017.11.15-10.51.56-bs4-ss8-incp_prep__nobg_noregQv_block2_f2_nofus0-2_poscontra_fne0.1-1-1_tisr1-sgd_t500-_qg0.9-up0.2-vtd4.0-_lr1e1-_32-1024-_na3-3',
37 | help='load model prefix')
38 | return parser.parse_args()
39 |
40 |
41 | def create_module(ctx, seq_len, is_train=False):
42 | net = sym_DQN(args, is_train=is_train, num_acts=args.num_acts, bn=False, global_stats=False, no_his=True)
43 | mod = mx.mod.Module(symbol=net, data_names=('data1', 'data2'), label_names=None,
44 | fixed_param_names=['data1', 'data2'], context=ctx)
45 | mod.bind(data_shapes=[('data1', (seq_len, 1024)), ('data2', (seq_len, 1024)),],
46 | for_training=is_train, inputs_need_grad=False)
47 | return mod
48 |
49 |
50 | def dist(a, b):
51 | diff = a - b
52 | return mx.nd.sum(diff*diff).asnumpy()[0]
53 |
54 |
55 | def copyto(x):
56 | return x.copyto(x.context)
57 |
58 |
59 | def get_train_args(name):
60 | fn = open('log/%s.log'%name)
61 | s = fn.readline()[10:]
62 | fn.close()
63 | s = 'ret=argparse.' + s
64 | exec(s)
65 | return ret
66 |
67 | test_args = parse_args()
68 | print 'test arg:', test_args
69 | devices = [mx.gpu(int(i)) for i in test_args.gpus.split(',')]
70 |
71 | model_path = 'models'
72 |
73 | args = get_train_args(test_args.model_load_prefix)
74 | print 'train arg:', args
75 |
76 | model_path = 'models'
77 | arg_params, aux_params = load_checkpoint(
78 | '%s/%s' % (model_path, test_args.model_load_prefix), test_args.model_load_epoch)
79 | base_mod = mx.mod.Module(symbol=sym_base_net(args.network, is_test=True), data_names=('data',), label_names=None, context=devices)
80 | base_mod.bind(data_shapes=[('data', (1024, 3, 224, 112))], for_training=False)
81 | base_mod.init_params(initializer=None, arg_params=arg_params,aux_params=aux_params,force_init=True)
82 |
83 | dataiter = get_imRecordIter(
84 | args, 'recs/eval_test', (3, 224, 112), 1024,
85 | shuffle=False, aug=False, even_iter=True)
86 | dataiter.reset()
87 | F = base_mod.predict(dataiter)
88 | del dataiter
89 | print 'l2'
90 | print F
91 |
92 | print 'base feat predicted'
93 | query = np.loadtxt('/data3/matt/MARS/MARS-evaluation/info/query.csv', delimiter=',').astype(int)
94 | gallery = np.loadtxt('/data3/matt/MARS/MARS-evaluation/info/gallery.csv', delimiter=',').astype(int)
95 |
96 | cnts, cnt = [0, 0, 0, 0], [0]
97 | max_turn, tot_ava = args.sample_size, 1
98 |
99 | query[:, 2] -= 1
100 | gallery[:, 0] -= 1
101 |
102 |
103 |
104 | def get_data(a):
105 | return F[random.randrange(gallery[a, 0], gallery[a, 1])]
106 |
107 | P = mx.nd.zeros((gallery.shape[0], args.sample_size, F.shape[1]),ctx=devices[0])
108 | for a in xrange(gallery.shape[0]):
109 | j, camj = gallery[a, 2:]
110 | if j == -1 or gallery[a, 0] == gallery[a, 1]:
111 | continue
112 | cur = mx.nd.zeros((args.sample_size, F.shape[1]),ctx=devices[0])
113 | for k in xrange(args.sample_size):
114 | cur[k] = get_data(a)
115 | P[a] = copyto(cur)
116 |
117 | data1 = mx.symbol.Variable(name="data1")
118 | data2 = mx.symbol.Variable(name="data2")
119 | Qsym = sym_DQN(data1, data2, args.num_sim, args.num_hidden, is_train=False, num_acts=args.num_acts, min_states=args.min_states, min_imgs=args.min_imgs, fusion=args.fusion, bn=args.q_bn, global_stats=False, no_his=False)
120 | Q = mx.mod.Module(symbol=Qsym, data_names=('data1', 'data2'), label_names=None, context=devices[0])
121 | Q.bind(data_shapes=[('data1', (args.sample_size, F.shape[1])),
122 | ('data2', (args.sample_size, F.shape[1]))],
123 | for_training=False)
124 | Q.init_params(initializer=None, arg_params=arg_params,aux_params=aux_params,force_init=True, allow_missing=False)
125 | cnts, cnt = [0, 0, 0, 0], [0]
126 | pc, ps = 0, 0
127 | hists = [0 for _ in xrange(args.sample_size)]
128 | for q in xrange(query.shape[0]):
129 | i, cam, idx = query[q]
130 | if gallery[idx, 0] == gallery[idx, 1]:
131 | continue
132 | d = []
133 | for a in xrange(gallery.shape[0]):
134 | j, camj = gallery[a, 2:]
135 | if j == i and camj == cam or j == -1 or gallery[a, 0] == gallery[a, 1]:
136 | continue
137 | if random.random() > 0.01:
138 | continue
139 | Q.forward(mx.io.DataBatch([P[idx], P[a]], []), is_train=False)
140 | Qvalues = Q.get_outputs()[0].asnumpy()
141 | for k in xrange(args.sample_size):
142 | if Qvalues[k, 2] < Qvalues[k, 0] or Qvalues[k, 2] < Qvalues[k, 1] or k == args.sample_size - 1:
143 | d.append((Qvalues[k, 0] - Qvalues[k, 1], j))
144 | ps += k + 1
145 | pc += 1
146 | hists[k] += 1
147 | break
148 |
149 | d = sorted(d)
150 | cur = 0
151 | for a in xrange(len(d)):
152 | j = d[a][1]
153 | if j == i:
154 | break
155 | else:
156 | cur += 1
157 | update_cnts(cur, cnts, cnt)
158 | print q, i, cam, idx, cur, norm_cnts(cnts, cnt), ps * 1.0 / pc
--------------------------------------------------------------------------------
/baseline/baseline_test.py:
--------------------------------------------------------------------------------
1 | import sys
2 | #sys.path.insert(0, "mxnet/python/")
3 | import find_mxnet
4 | import mxnet as mx
5 | import numpy as np
6 | import argparse
7 | import random
8 | import importlib
9 |
10 | from utils import get_imRecordIter, load_checkpoint
11 | from sklearn.metrics import average_precision_score
12 |
13 |
14 | cmcs = [1, 5, 10, 20]
15 | cn = 4
16 |
17 |
18 | def norm_cnts(cnts, cnt):
19 | return [cnts[i] / cnt[0] for i in xrange(cn)]
20 |
21 |
22 | def update_cnts(d, cnts, cnt, N, i):
23 | #r = np.argsort(d)
24 | dcur = d[i]
25 | cur, pre = 0, []
26 | for j in xrange(N):
27 | if d[j] <= d[i] and not j == i:
28 | cur += 1
29 | pre.append(j)
30 | for j in xrange(cn):
31 | if cur < cmcs[j]:
32 | cnts[j] += 1.0
33 | cnt[0] += 1.0
34 | return pre
35 |
36 |
37 | def parse_args():
38 | parser = argparse.ArgumentParser(
39 | description='single domain car recog training')
40 | parser.add_argument('--gpus', type=str, default='6',
41 | help='the gpus will be used, e.g "0,1"')
42 | parser.add_argument('--data-dir', type=str,
43 | default='/data3/matt/iLIDS-VID/recs',#"/data3/matt/prid_2011/recs",#
44 | help='data directory')
45 | parser.add_argument('--num-examples', type=int, default=10000,
46 | help='the number of training examples')
47 | parser.add_argument('--num-id', type=int, default=100,
48 | help='the number of training ids')
49 | parser.add_argument('--batch-size', type=int, default=512,
50 | help='the batch size')
51 | parser.add_argument('--base-model-load-epoch', type=int, default=1,
52 | help='load the model on an epoch using the model-load-prefix')
53 | parser.add_argument('--base-model-load-prefix', type=str, default='ilds_baseline',
54 | help='load model prefix')
55 | parser.add_argument('--dataset', type=str, default='image_test',
56 | help='dataset (test/query)')
57 | parser.add_argument('--network', type=str,
58 | #default='alexnet', help='network name')
59 | default='inception-bn', help='network name')
60 | return parser.parse_args()
61 |
62 |
63 | def build_base_net(args, is_train=False, global_stats=False):
64 | '''
65 | network structure
66 | '''
67 | symbol = importlib.import_module('symbol_' + args.network).get_symbol()
68 | # concat = internals["ch_concat_5b_chconcat_output"]
69 | #symbol = mx.symbol.Dropout(data=symbol, name='dropout1')
70 | pooling = mx.symbol.Pooling(
71 | data=symbol, kernel=(1, 1), global_pool=True,
72 | pool_type='avg', name='global_pool')
73 | flatten = mx.symbol.Flatten(data=pooling, name='flatten')
74 | l2 = mx.symbol.L2Normalization(data=flatten, name='l2_norm')
75 | return l2
76 |
77 | args = parse_args()
78 | print args
79 | devices = [mx.gpu(int(i)) for i in args.gpus.split(',')]
80 | batch_size = args.batch_size
81 | model_path = 'models'
82 |
83 | def dist(a, b):
84 | diff = a - b
85 | return mx.nd.sum(diff*diff).asnumpy()[0]
86 |
87 | def copyto(x):
88 | return x.copyto(x.context)
89 |
90 | cnts_g, cnt_g = [0, 0, 0, 0], [0]
91 | max_turn, gc = 10, 0
92 | max_frames = max_turn
93 | cmc1 = np.zeros(max_frames)
94 | MAP = np.zeros(max_frames)
95 | results = []
96 | for sets in xrange(10):
97 | arg_params, aux_params = load_checkpoint(
98 | '%s/%s_%d' % (model_path, args.base_model_load_prefix, sets), args.base_model_load_epoch)
99 | base_mod = mx.mod.Module(symbol=build_base_net(args), data_names=('data', ), label_names=None, context=devices)
100 | base_mod.bind(data_shapes=[('data', (args.batch_size, 3, 224, 112))], for_training=False)
101 | base_mod.init_params(initializer=None, arg_params=arg_params,aux_params=aux_params,force_init=True)
102 |
103 | dataiter = get_imRecordIter(
104 | args, '%s%d' % (args.dataset, sets), (3, 224, 112), args.batch_size,
105 | shuffle=False, aug=False, even_iter=False)
106 |
107 | dataiter.reset()
108 |
109 | output = base_mod.predict(dataiter)
110 | F = output
111 | F2 = F
112 | print F.shape
113 |
114 | cnt_lst = np.loadtxt(args.data_dir + '/' + 'image_test' + str(sets) + '.txt').astype(int)
115 | N = cnt_lst.shape[0] / 2
116 |
117 | avp = []
118 | for i in xrange(N + N):
119 | for j in xrange(cnt_lst[i], cnt_lst[i + 1]):
120 | if j == cnt_lst[i]:
121 | g = copyto(F[j])
122 | else:
123 | g += F[j]
124 | avp.append(g / mx.nd.sqrt(mx.nd.sum(g * g)))
125 |
126 | cnts, cnt = [0, 0, 0, 0], [0]
127 |
128 | for i in xrange(N+N):
129 | d = []
130 | a = i % N
131 | scores = []
132 | label = np.array([(1 if _ == a else 0) for _ in xrange(N)])
133 | for j in xrange(N):
134 | d.append(dist(avp[i], avp[j if i >= N else (j + N)]))
135 | g, x, y = [], mx.nd.zeros((int(F.shape[1])),ctx=devices[0]), mx.nd.zeros((int(F.shape[1])),ctx=devices[0])
136 | for k in xrange(max_frames):
137 | if i < N:
138 | x += F2[random.randrange(cnt_lst[i], cnt_lst[i+1])]
139 | y += F2[random.randrange(cnt_lst[j+N], cnt_lst[j+1+N])]
140 | else:
141 | x += F2[random.randrange(cnt_lst[i], cnt_lst[i+1])]
142 | y += F2[random.randrange(cnt_lst[j], cnt_lst[j+1])]
143 | g.append(dist(x/(k+1), y/(k+1)))
144 | scores.append(g)
145 | scores = np.array(scores)
146 | for j in xrange(max_frames):
147 | MAP[j] += average_precision_score(label, -scores[:, j])
148 | if min(scores[:, j]) == scores[a, j]:
149 | cmc1[j] += 1
150 | gc += 1
151 | print gc
152 | print MAP[:10] / gc
153 | print cmc1[:10] / gc
154 | update_cnts(d, cnts, cnt, N, i if i < N else i - N)
155 | update_cnts(d, cnts_g, cnt_g, N, i if i < N else i - N)
156 | print i, norm_cnts(cnts, cnt), norm_cnts(cnts_g, cnt_g)
157 | results.append((cnts, cnt))
--------------------------------------------------------------------------------
/RL/img_lib.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 | import numpy as np
3 | import cv2
4 | import random
5 |
6 |
7 | class ImgLibrary(mx.io.DataIter):
8 | '''
9 | Load images from disk/memory
10 | '''
11 | def __init__(self, lst_name, batch_size, aug_params=dict(), shuffle=False, data_dir=''):
12 | super(ImgLibrary, self).__init__()
13 | self.batch_size = batch_size
14 | self.aug_params = aug_params.copy()
15 | self.shuffle = shuffle
16 | self.data_dir = data_dir
17 |
18 | self.data, self.labels = ImgLibrary.load_data(lst_name, data_dir)
19 | #print "load data over"
20 | self.data_num = self.labels.shape[0]
21 | self.label_num = 1 if len(self.labels.shape) == 1 else self.labels.shape[1]
22 | #print self.data_num, self.label_num
23 | self.reset()
24 |
25 | @staticmethod
26 | def load_data(lst_name, data_dir):
27 | img_lst = [x.strip().split('\t')
28 | for x in file(lst_name).read().splitlines()]
29 | im = cv2.imread(data_dir + img_lst[0][-1])
30 | print data_dir + img_lst[0][-1]
31 | h, w = im.shape[:2]
32 | n, m = len(img_lst), len(img_lst[0]) - 2
33 | data = []#np.zeros((n, h, w, 3), dtype=np.uint8)
34 | labels = np.zeros((n, m), dtype=np.int32) if m > 1 else np.zeros((n, ), dtype=np.int32)
35 |
36 | for i in range(len(img_lst)):
37 | #im = cv2.imread(data_dir + img_lst[i][-1])
38 |
39 | #data[i] = im
40 | data.append(data_dir + img_lst[i][-1])
41 | labels[i] = img_lst[i][1:-1] if m > 1 else img_lst[i][1]
42 |
43 | data = np.array(data)
44 |
45 | return data, labels
46 |
47 | @staticmethod
48 | def even_shuffle(labels):
49 | '''
50 | shuffle images lists and make pairs
51 | '''
52 | s = [(x, int(random.random() * 1e5), i) for i, x in enumerate(labels)]
53 | s = sorted(s, key=lambda x: (x[0], x[1]))
54 | lst = [x[2] for x in s]
55 |
56 | idx = range(0, len(lst), 2)
57 | random.shuffle(idx)
58 | ret = []
59 | for i in idx:
60 | ret.append(lst[i])
61 | ret.append(lst[i + 1])
62 |
63 | return ret
64 |
65 | @staticmethod
66 | def model_shuffle(labels):
67 | '''
68 | shuffle images and images with same model are grouped together
69 | '''
70 | models_idx = range(int(np.max(labels)) + 1)
71 | random.shuffle(models_idx)
72 | s = [(models_idx[x], int(random.random() * 1e5), i) for i, x in enumerate(labels)]
73 | s = sorted(s, key=lambda x: (x[0], x[1]))
74 | lst = [x[2] for x in s]
75 |
76 | return lst
77 |
78 | def reset(self):
79 | self.current = 0
80 | if self.shuffle:
81 | #idx = ImgLibrary.even_shuffle(self.labels)
82 | idx = ImgLibrary.model_shuffle(self.labels)
83 | self.data = self.data[idx]
84 | self.labels = self.labels[idx]
85 |
86 | @property
87 | def provide_data(self):
88 | shape = self.aug_params['input_shape']
89 |
90 | return [('data', (self.batch_size, shape[0], shape[1], shape[2]))]
91 |
92 | @property
93 | def provide_label(self):
94 | return [('softmax_label', (self.batch_size, self.label_num))]
95 |
96 | @staticmethod
97 | def augment(im, aug_params, aug=False):
98 | '''
99 | augmentation (resize, crop, mirror)
100 | '''
101 | crop_h, crop_w = aug_params['input_shape'][1:]
102 | ori_h, ori_w = im.shape[:2]
103 | resize = aug_params['resize']
104 | if ori_h < ori_w:
105 | h, w = resize, int(float(resize) / ori_h * ori_w)
106 | else:
107 | h, w = int(float(resize) / ori_w * ori_h), resize
108 |
109 | if h != ori_h:
110 | im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
111 |
112 | x, y = (w - crop_w) / 2, (h - crop_h) / 2
113 | if aug_params['rand_crop'] and aug:
114 | x = random.randint(0, w - crop_w)
115 | y = random.randint(0, h - crop_h)
116 | im = im[y:y + crop_h, x:x + crop_w, :]
117 |
118 | # cv2.imshow("name", im.astype(np.uint8))
119 | # cv2.waitKey()
120 |
121 | # Blur
122 | '''org = x.asnumpy()
123 | sig = random.random() * 5
124 | result = np.zeros_like(org)
125 | for i in xrange(3):
126 | result[0, i, :, :] = ndimage.gaussian_filter(org[0, i, :, :], sig)
127 | print sig
128 | import cv2
129 | cv2.imshow("name", (result[0].transpose(1, 2, 0)+128).astype(np.uint8))
130 | cv2.waitKey()
131 | cv2.imshow("name", (org[0].transpose(1, 2, 0)+128).astype(np.uint8))
132 | cv2.waitKey()'''
133 |
134 | im = np.transpose(im, (2, 0, 1))
135 | newim = np.zeros_like(im)
136 | newim[0] = im[2]
137 | newim[1] = im[1]
138 | newim[2] = im[0]
139 |
140 | if aug and aug_params['rand_mirror'] and random.randint(0, 1) == 1:
141 | newim = newim[:, :, ::-1]
142 | newim -= aug_params['mean']
143 |
144 | return newim
145 |
146 | def get_single(self, i, aug=False):
147 | im = cv2.imread(self.data[i]).astype(np.float32)
148 | im = ImgLibrary.augment(im, self.aug_params, aug)
149 | return im
150 |
151 | def next(self):
152 | #if self.current + self.batch_size > self.data_num:
153 | if self.current > self.data_num:
154 | raise StopIteration
155 |
156 | shape = self.aug_params['input_shape']
157 | x = np.zeros((self.batch_size, shape[0], shape[1], shape[2]))
158 | y = np.zeros((self.batch_size, self.label_num) if self.label_num > 1
159 | else (self.batch_size, ))
160 | index = []
161 | for i in range(self.current, self.current + self.batch_size):
162 | im = cv2.imread(self.data[i % self.data_num]).astype(np.float32)
163 | im = ImgLibrary.augment(im, self.aug_params)
164 | x[i - self.current] = im
165 | y[i - self.current] = self.labels[i % self.data_num]
166 | index.append(i)
167 |
168 | #x -= self.aug_params['mean']
169 |
170 | x = mx.nd.array(x)
171 | label = mx.nd.array(y)
172 |
173 | batch = mx.io.DataBatch(data=[x], label=[label], pad=0, index=index)
174 | self.current += self.batch_size
175 |
176 | return batch
--------------------------------------------------------------------------------
/RL/mars_test_baseline.py:
--------------------------------------------------------------------------------
1 | import sys
2 | #sys.path.insert(0, "mxnet/python/")
3 | import find_mxnet
4 | import mxnet as mx
5 |
6 | import numpy as np
7 | import argparse
8 | import random
9 |
10 | from symbols import sym_base_net, sym_DQN
11 | from utils import get_imRecordIter, load_checkpoint
12 | from sklearn.metrics import average_precision_score
13 |
14 |
15 | cmcs = [1, 5, 10, 20]
16 | cn = 4
17 |
18 | def norm_cnts(cnts, cnt):
19 | return [cnts[i] / cnt[0] for i in xrange(cn)]
20 |
21 | def update_cnts(cur, cnts, cnt):
22 | for j in xrange(cn):
23 | if cur < cmcs[j]:
24 | cnts[j] += 1.0
25 | cnt[0] += 1.0
26 |
27 | def parse_args():
28 | parser = argparse.ArgumentParser(
29 | description='single domain car recog training')
30 | parser.add_argument('--gpus', type=str, default='2',
31 | help='the gpus will be used, e.g "0,1"')
32 | parser.add_argument('--data-dir', type=str,
33 | default="/data3/matt/MARS",
34 | help='data directory')
35 | parser.add_argument('--sample-size', type=int, default=8,
36 | help='sample frames from each video')
37 | parser.add_argument('--base-model-load-epoch', type=int, default=1,
38 | help='load the model on an epoch using the model-load-prefix')
39 | parser.add_argument('--base-model-load-prefix', type=str, default='mars_alex',#'mars_baseline_b8',#
40 | help='load model prefix')
41 | parser.add_argument('--network', type=str,
42 | default='alexnet',#'inception-bn',#
43 | help='network name')
44 | return parser.parse_args()
45 |
46 |
47 | def create_module(ctx, seq_len, is_train=False):
48 | net = sym_DQN(args, is_train=is_train, num_acts=args.num_acts, bn=False, global_stats=False, no_his=True)
49 | mod = mx.mod.Module(symbol=net, data_names=('data1', 'data2'), label_names=None,
50 | fixed_param_names=['data1', 'data2'], context=ctx)
51 | mod.bind(data_shapes=[('data1', (seq_len, 1024)), ('data2', (seq_len, 1024)),],
52 | for_training=is_train, inputs_need_grad=False)
53 | return mod
54 |
55 | def dist(a, b):
56 | diff = a - b
57 | return mx.nd.sum(diff*diff).asnumpy()[0]
58 |
59 | def copyto(x):
60 | return x.copyto(x.context)
61 |
62 |
63 |
64 | args = parse_args()
65 | print args
66 | devices = [mx.gpu(int(i)) for i in args.gpus.split(',')]
67 |
68 | batch_size = 128
69 | seq_len = batch_size
70 | model_path = 'models'
71 | random.seed(19930214)
72 | pool = set([random.randrange(1980) for _ in xrange(100)])
73 | print pool
74 |
75 | arg_params, aux_params = load_checkpoint(
76 | '../baseline/%s/%s' % (model_path, args.base_model_load_prefix), args.base_model_load_epoch)
77 | base_mod = mx.mod.Module(symbol=sym_base_net(args.network, is_test=True), data_names=('data',), label_names=None, context=devices)
78 | base_mod.bind(data_shapes=[('data', (1024, 3, 224, 112))], for_training=False)
79 | base_mod.init_params(initializer=None, arg_params=arg_params,aux_params=aux_params,force_init=True)
80 |
81 | dataiter = get_imRecordIter(
82 | args, 'recs/eval_test', (3, 224, 112), 1024,
83 | shuffle=False, aug=False, even_iter=True)
84 | dataiter.reset()
85 | F = base_mod.predict(dataiter)
86 | del dataiter
87 | print 'l2'
88 | print F
89 |
90 | print 'base feat predicted'
91 | query = np.loadtxt('/data3/matt/MARS/MARS-evaluation/info/query.csv', delimiter=',').astype(int)
92 | gallery = np.loadtxt('/data3/matt/MARS/MARS-evaluation/info/gallery.csv', delimiter=',').astype(int)
93 |
94 | cnts, cnt = [0, 0, 0, 0], [0]
95 | max_turn, tot_ava = args.sample_size, 1
96 |
97 | query[:, 2] -= 1
98 | gallery[:, 0] -= 1
99 |
100 |
101 |
102 | def get_data(a):
103 | return F[random.randrange(gallery[a, 0], gallery[a, 1])]
104 |
105 | max_turn, gc = 10, 0
106 | max_frames = max_turn
107 | cmc1 = np.zeros(max_frames)
108 | MAP = np.zeros(max_frames)
109 | results = []
110 |
111 | def dist(a, b):
112 | diff = a - b
113 | return mx.nd.sum(diff*diff).asnumpy()[0]
114 |
115 | cur = mx.nd.zeros((gallery.shape[0], F.shape[1]),ctx=devices[0])
116 | avgs = mx.nd.zeros((max_frames, gallery.shape[0], F.shape[1]),ctx=devices[0])
117 | for k in xrange(max_frames):
118 | for a in xrange(gallery.shape[0]):
119 | j, camj = gallery[a, 2:]
120 | if j == -1 or gallery[a, 0] == gallery[a, 1]:
121 | continue
122 | cur[a] = get_data(a)
123 | if k == 0:
124 | avgs[k] = copyto(cur)
125 | else:
126 | avgs[k] = (avgs[k - 1] + cur)
127 | for k in xrange(1, max_frames):
128 | avgs[k] /= (k + 1)
129 |
130 |
131 | cnts_g, cnt_g = [0, 0, 0, 0], [0]
132 |
133 | for q in xrange(query.shape[0]):
134 | i, cam, idx = query[q]
135 | if gallery[idx, 0] == gallery[idx, 1]:
136 | continue
137 | scores = []
138 | labels = []
139 | d = []
140 | for k in xrange(max_frames):
141 | g = avgs[k]
142 | diff = g[idx] - g
143 | d.append(mx.nd.sum(diff*diff, axis=1).asnumpy())
144 | for a in xrange(gallery.shape[0]):
145 | j, camj = gallery[a, 2:]
146 | if j == i and camj == cam or j == -1 or gallery[a, 0] == gallery[a, 1]:
147 | continue
148 | g = []
149 | for k in xrange(max_frames):
150 | g.append(d[k][a])
151 | scores.append(g)
152 | labels.append(1 if i == j else 0)
153 |
154 | scores = np.array(scores)
155 | for j in xrange(max_frames):
156 | MAP[j] += average_precision_score(labels, -scores[:, j])
157 | a = np.argmin(scores[:, j])
158 | if labels[a] == 1:
159 | cmc1[j] += 1
160 | gc += 1
161 | print q, i, cam, idx, gc
162 | print MAP[:10] / gc
163 | print cmc1[:10] / gc
164 |
165 | '''
166 | avgs = []
167 | for a in xrange(gallery.shape[0]):
168 | j, camj = gallery[a, 2:]
169 | if j == -1 or gallery[a, 0] == gallery[a, 1]:
170 | avgs.append(None)
171 | continue
172 | for k in xrange(gallery[a, 0], gallery[a, 1]):
173 | if k == gallery[a, 0]:
174 | avg_opp = copyto(F[k])
175 | else:
176 | avg_opp += F[k]
177 | avg_opp /= gallery[a, 1] - gallery[a, 0]
178 | avgs.append(avg_opp)
179 |
180 | for q in xrange(query.shape[0]):
181 | if not q in pool:
182 | continue
183 | i, cam, idx = query[q]
184 | if gallery[idx, 0] == gallery[idx, 1]:
185 | continue
186 | scores = []
187 | label = []
188 | d = []
189 | for k in xrange(gallery[idx, 0], gallery[idx, 1]):
190 | if k == gallery[idx, 0]:
191 | avg_cur = copyto(F[k])
192 | else:
193 | avg_cur += F[k]
194 | avg_cur /= gallery[idx, 1] - gallery[idx, 0]
195 | for a in xrange(gallery.shape[0]):
196 | j, camj = gallery[a, 2:]
197 | if j == i and camj == cam or j == -1 or gallery[a, 0] == gallery[a, 1]:
198 | continue
199 | d.append((dist(avg_cur, avgs[a]), j))
200 |
201 | d = sorted(d)
202 | cur = 0
203 | for a in xrange(len(d)):
204 | j = d[a][1]
205 | if j == i:
206 | break
207 | else:
208 | cur += 1
209 | update_cnts(cur, cnts, cnt)
210 | print q, i, cam, idx, cur, norm_cnts(cnts, cnt)
211 | '''
212 |
--------------------------------------------------------------------------------
/RL/symbols.py:
--------------------------------------------------------------------------------
1 | import find_mxnet
2 | import mxnet as mx
3 | import importlib
4 |
5 | from rnn_models import get_gru_cell, get_lstm_cell, lstm, gru
6 |
7 | BN_EPS = 1e-5+1e-10
8 |
9 |
10 | def sym_base_net(network, fix_gamma=False, is_train=False, global_stats=False, is_test=False):
11 | '''
12 | network structure
13 | '''
14 | if is_test:
15 | data = mx.symbol.Variable(name="data")
16 | else:
17 | data1 = mx.symbol.Variable(name="data1")
18 | data2 = mx.symbol.Variable(name="data2")
19 | data = mx.sym.Concat(*[data1, data2], dim=0, name='data')
20 | symbol = importlib.import_module('symbol_' + network).get_symbol(data, fix_gamma=fix_gamma, global_stats=global_stats)
21 | pooling = mx.symbol.Pooling(
22 | data=symbol, kernel=(1, 1), global_pool=True,
23 | pool_type='avg', name='global_pool')
24 | flatten = mx.symbol.Flatten(data=pooling, name='flatten')
25 | if is_test:
26 | l2 = mx.sym.L2Normalization(flatten)
27 | return l2
28 | else:
29 | split_flatten = mx.sym.SliceChannel(flatten, num_outputs=2, axis=0)
30 | return split_flatten[0], split_flatten[1]
31 | return None
32 |
33 |
34 | def fusion_layer(data, num_hidden, num_layers, name, l2=False, weights=[], bias=[]):
35 | org_data = data
36 | for i in xrange(num_layers):
37 | data = mx.sym.FullyConnected(data=data, num_hidden=num_hidden, name='%s%d'%(name,i), weight=weights[i], bias=bias[i])
38 | if i == 0:
39 | first_layer = data
40 | elif i == num_layers - 1:
41 | continue
42 | data = mx.sym.Activation(data=data, act_type='relu', name='%srelu%d'%(name,i))
43 | if l2:
44 | return mx.sym.Concat(*[mx.sym.L2Normalization(first_layer), mx.sym.L2Normalization(data)], dim=1)
45 | return mx.sym.Concat(*[org_data, data], dim=1)
46 |
47 |
48 | def get_dist_sym(a, b):
49 | diff = a - b
50 | return mx.sym.sum(diff*diff, axis=1, keepdims=1)
51 |
52 |
53 | def sym_DQN(data1, data2, num_sim, num_hidden, min_states, min_imgs, num_acts=4, fusion=False, is_train=False, bn=False, l2_norm=False, global_stats=False, no_his=True, debug=False, maxout=False, cls=False):
54 | #data1 = mx.sym.Dropout(data1)
55 | #data2 = mx.sym.Dropout(data2)
56 |
57 | featmaps = [mx.sym.SliceChannel(mx.sym.L2Normalization(data1), num_outputs=min_states, axis=0),
58 | mx.sym.SliceChannel(mx.sym.L2Normalization(data2), num_outputs=min_states, axis=0)]
59 | gs = featmaps
60 | ds, ts = [], []
61 | for i in xrange(min_states):
62 | d, t = [], []
63 | d.append(get_dist_sym(featmaps[0][i], featmaps[1][i]))
64 | t.append(mx.sym.sum(featmaps[0][i] * featmaps[1][i], axis=1, keepdims=1))
65 | for j in xrange(i):
66 | d.append(get_dist_sym(featmaps[0][i], featmaps[1][j]))
67 | d.append(get_dist_sym(featmaps[0][j], featmaps[1][i]))
68 | t.append(mx.sym.sum(featmaps[0][i] * featmaps[1][j], axis=1, keepdims=1))
69 | t.append(mx.sym.sum(featmaps[0][j] * featmaps[1][i], axis=1, keepdims=1))
70 | ds.append(d)
71 | ts.append(t)
72 | print i, len(d)
73 |
74 |
75 | featmap = mx.sym.abs(mx.sym.L2Normalization(data1) - mx.sym.L2Normalization(data2))
76 | featmaps = mx.sym.SliceChannel(featmap, num_outputs=min_states, axis=0, name='featmaps')
77 | W1, W2, W3 = mx.symbol.Variable(name="fc1_weight"), mx.symbol.Variable(name="fc2_weight"), mx.symbol.Variable(name="Qv_weight")
78 | b1, b2, b3 = mx.symbol.Variable(name="fc1_bias"), mx.symbol.Variable(name="fc2_bias"), mx.symbol.Variable(name="Qv_bias")
79 | if fusion:
80 | Wfus = [[mx.symbol.Variable(name="fus%d-%d_weight"%(i,j)) for j in xrange(2)] for i in xrange(3)]
81 | bfus = [[mx.symbol.Variable(name="fus%d-%d_bias"%(i,j)) for j in xrange(2)] for i in xrange(3)]
82 | ############# mini batch ###################
83 | ret, unsures, atts = [], [], []
84 | if True:
85 | for i in xrange(min_states):
86 | if i == 0:
87 | tmin, tmax, tsum = ts[0][0], ts[0][0], ts[0][0]
88 | dmin, dmax, dsum = ds[0][0], ds[0][0], ds[0][0]
89 | else:
90 | for j in xrange(len(ts[i])):
91 | tmin = mx.sym.minimum(tmin, ts[i][j])
92 | tmax = mx.sym.maximum(tmax, ts[i][j])
93 | tsum = tsum + ts[i][j]
94 | for j in xrange(len(ds[i])):
95 | dmin = mx.sym.minimum(dmin, ds[i][j])
96 | dmax = mx.sym.maximum(dmax, ds[i][j])
97 | dsum = dsum + ds[i][j]
98 | if i <= 1:
99 | agg = featmaps[0]
100 | feat = mx.sym.Concat(*[tmin, tmax, tsum / ((i + 1) * (i + 1)), dmin, dmax, dsum / ((i + 1) * (i + 1)),], dim=1)
101 | else:
102 | print i, len(unsures), len(ret)
103 | g1 = mx.sym.broadcast_mul(gs[0][0], unsures[0])
104 | g2 = mx.sym.broadcast_mul(gs[1][0], unsures[0])
105 | wsum = unsures[0]
106 | for j in xrange(1, i):
107 | g1 = g1 + mx.sym.broadcast_mul(gs[0][j], unsures[j])
108 | g2 = g2 + mx.sym.broadcast_mul(gs[1][j], unsures[j])
109 | wsum = wsum + unsures[j]
110 | g1 = mx.sym.broadcast_div(g1, wsum)
111 | g2 = mx.sym.broadcast_div(g2, wsum)
112 | agg = mx.sym.abs(g1 - g2)
113 | feat = mx.sym.Concat(*[tmin, tmax, tsum / ((i + 1) * (i + 1)), dmin, dmax, dsum / ((i + 1) * (i + 1)),], dim=1)
114 |
115 | fm = featmaps[i]
116 | if fusion:
117 | print 'mini batch fusion on'
118 | agg = fusion_layer(agg, num_hidden, 2, 'fus_featmap1', weights=[Wfus[0][0], Wfus[0][1]], bias=[bfus[0][0], bfus[0][1]])#, l2=True)
119 | fm = fusion_layer(fm, num_hidden, 2, 'fus_featmap2', weights=[Wfus[1][0], Wfus[1][1]], bias=[bfus[1][0], bfus[1][1]])#, l2=True)
120 | feat = fusion_layer(feat, 32, 2, 'fus_feat', weights=[Wfus[2][0], Wfus[2][1]], bias=[bfus[2][0], bfus[2][1]])
121 | diff = mx.sym.Concat(*[agg, fm, feat], dim=1, name='diff%d'%i)
122 | #diff = mx.sym.Concat(*[agg, fm], dim=1, name='diff%d'%i)
123 | sir_fc1 = mx.sym.FullyConnected(data=diff, num_hidden=num_hidden,name='sirfc1-%d'%i, weight=W1, bias=b1)
124 | sir_relu1 = mx.sym.Activation(data=sir_fc1, act_type='relu', name='sirrl1-%d'%i)
125 | sir_fc2 = mx.sym.FullyConnected(data=sir_relu1, num_hidden=num_hidden,name='sirfc2-%d'%i, weight=W2, bias=b2)
126 | sir_relu2 = mx.sym.Activation(data=sir_fc2, act_type='relu', name='sirrl2-%d'%i)
127 | Q = mx.sym.FullyConnected(data=sir_relu2, num_hidden=num_acts,name='Qvalue-%d'%i, weight=W3, bias=b3)
128 | qsm = mx.sym.SoftmaxActivation(Q)
129 | qsm = mx.sym.BlockGrad(qsm)
130 | ret.append(Q)
131 | atts.append(qsm)
132 | if i + 1 < min_states:
133 | Q_sliced = mx.sym.SliceChannel(qsm, num_outputs=3, axis=1)
134 | unsures.append(1.0 - Q_sliced[2])
135 |
136 | Qvalue = mx.sym.Concat(*ret, dim=0, name='Qvalues')
137 | attss = mx.sym.Concat(*atts, dim=0, name='atts')
138 | return mx.sym.Group([Qvalue, attss])
139 |
140 |
141 | if __name__ == '__main__':
142 | #sym = build_base_net('inception-bn')
143 | #mx.viz.print_summary(sym, {'data': (1, 3, 128, 64)})
144 | sym = sym_DQN(128, 128, num_acts=3, min_states=2, min_imgs=4, fusion=True)
145 | mx.viz.print_summary(sym, {'data1': (2, 1024), 'data2': (2, 1024)})
146 |
--------------------------------------------------------------------------------
/baseline/calc_cmc.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import math
3 | import argparse
4 | import munkres
5 | import hungarian
6 | import random
7 | import sys
8 |
9 |
10 | cmcs = [1, 5, 10, 20]
11 | cn = 4
12 | single_sample_times, sample_rounds = 100, 10
13 | match_points = 100
14 |
15 | def norm_cnts(cnts, cnt):
16 | return [cnts[i] / cnt[0] for i in xrange(cn)]
17 |
18 | def update_cnts(d, cnts, cnt, N, i):
19 | r = np.argsort(d)
20 | cur = -1
21 | for j in xrange(N):
22 | if r[j] == i:
23 | cur = j
24 | break
25 | for j in xrange(cn):
26 | if cur < cmcs[j]:
27 | cnts[j] += 1.0
28 | cnt[0] += 1.0
29 | print cur, norm_cnts(cnts, cnt)
30 |
31 | def pooling_method(f, N):
32 | cam0 = []
33 | for i in xrange(N):
34 | p = np.zeros(1024)
35 | #p = np.full(1024, -1e100)
36 | for a in xrange(cnt_lst[i], cnt_lst[i + 1]):
37 | p += f[a]
38 | #p = np.maximum(p, f[a])
39 | p /= (cnt_lst[i + 1] - cnt_lst[i])
40 | cam0.append(p)
41 | cam1 = []
42 | for i in xrange(N):
43 | p = np.zeros(1024)
44 | #p = np.full(1024, -1e100)
45 | for a in xrange(cnt_lst[i + N], cnt_lst[i + N + 1]):
46 | p += f[a]
47 | #p = np.maximum(p, f[a])
48 | p /= (cnt_lst[i + N + 1] - cnt_lst[i + N])
49 | cam1.append(p)
50 |
51 | cam0, cam1 = np.array(cam0), np.array(cam1)
52 | for i in xrange(1024):
53 | norm = (cam0[:, i] * cam0[:, i]).sum() + (cam1[:, i] * cam1[:, i]).sum()
54 | norm = math.sqrt(norm)
55 | cam0[:, i] /= norm
56 | cam1[:, i] /= norm
57 |
58 | cnts, cnt = [0, 0, 0, 0], [0]
59 | for i in xrange(N):
60 | d = np.zeros(N)
61 | for j in xrange(N):
62 | t = (cam0[i] - cam1[j])
63 | d[j] += (t * t).sum()
64 | update_cnts(d, cnts, cnt, N, i)
65 |
66 | for i in xrange(N):
67 | d = np.zeros(N)
68 | for j in xrange(N):
69 | t = (cam1[i] - cam0[j])
70 | d[j] += (t * t).sum()
71 | update_cnts(d, cnts, cnt, N, i)
72 |
73 | print 'pooling method', norm_cnts(cnts, cnt)
74 |
75 | def calc_mean(d):
76 | ret = np.zeros(len(d))
77 | for t in xrange(sample_rounds):
78 | for i in xrange(len(d)):
79 | x = 0.0
80 | for k in xrange(single_sample_times):
81 | a = random.randint(0, d[i].shape[0] - 1)
82 | b = random.randint(0, d[i].shape[1] - 1)
83 | x += d[i][a][b]
84 | x /= single_sample_times
85 | ret[i] += x
86 | return ret
87 |
88 | def calc_median(d):
89 | ret = np.zeros(len(d))
90 | for t in xrange(sample_rounds):
91 | for i in xrange(len(d)):
92 | x = []
93 | for k in xrange(single_sample_times):
94 | a = random.randint(0, d[i].shape[0] - 1)
95 | b = random.randint(0, d[i].shape[1] - 1)
96 | x.append(d[i][a][b])
97 | x = sorted(x)
98 | ret[i] += x[single_sample_times / 2] + x[single_sample_times / 2 + 1]
99 | return ret
100 |
101 | def calc_min(d):
102 | ret = np.zeros(len(d))
103 | for t in xrange(sample_rounds):
104 | for i in xrange(len(d)):
105 | x = 1e100
106 | for k in xrange(single_sample_times):
107 | a = random.randint(0, d[i].shape[0] - 1)
108 | b = random.randint(0, d[i].shape[1] - 1)
109 | x = min(x, d[i][a][b])
110 | ret[i] += x
111 | return ret
112 |
113 | def calc_match(d):
114 | ret = np.zeros(len(d))
115 | for t in xrange(sample_rounds):
116 | for i in xrange(len(d)):
117 | choices_a = [random.randint(0, d[i].shape[0] - 1) for _ in xrange(match_points)]
118 | choices_b = [random.randint(0, d[i].shape[1] - 1) for _ in xrange(match_points)]
119 | mat = d[i][choices_a]
120 | mat = (mat.T)[choices_b]
121 | am = np.array(mat)
122 | match = hungarian.lap(am)[0]
123 | #M = munkres.Munkres()
124 | #match = M.compute(am)
125 | x = 0.0
126 | #g = []
127 | for p in xrange(len(match)):
128 | #for p in match:
129 | x += mat[i][match[i]]
130 | #g.append(mat[i][match[i]])
131 | #x += mat[p[0]][p[1]]
132 | #g.sort()
133 | #ret[i] += g[len(g) / 2] if len(g) % 2 == 1 else (g[len(g) / 2] + g[len(g) / 2 - 1]) * 0.5
134 | ret[i] += x
135 | return ret
136 |
137 | def calc_order(d, rerank=False):
138 | ret = np.zeros(len(d))
139 | t = 10000
140 | for i in xrange(len(d)):
141 | t = min(d[i].shape[0], t)
142 | t = min(t, d[i].shape[1])
143 | for i in xrange(len(d)):
144 | if rerank:
145 | pass
146 | else:
147 | tp = min(d[i].shape[0], d[i].shape[1])
148 | choices_a = xrange(tp)
149 | choices_b = xrange(tp)
150 | mat = d[i][choices_a]
151 | mat = (mat.T)[choices_b]
152 | am = np.array(mat)
153 | M = munkres.Munkres()
154 | #print mat.shape
155 | match = M.compute(am)
156 | match = sorted(match)
157 | g = [mat[match[0][0]][match[0][1]]]
158 | for p in xrange(1, len(match)):
159 | g.append(mat[match[p][0]][match[p][1]])
160 | #for q in xrange(p - 1):
161 | #if match[p][1] > match[q][1]:
162 | # ret[i] += 1
163 | g = sorted(g)
164 | for p in xrange(t):
165 | ret[i] += g[p]
166 | print len(match), ret[i]
167 | #print ret
168 | return ret
169 |
170 | def other_method(f, N):
171 | cnts_median, cnt_median = [0, 0, 0, 0], [0]
172 | cnts_mean, cnt_mean = [0, 0, 0, 0], [0]
173 | cnts_min, cnt_min = [0, 0, 0, 0], [0]
174 | cnts_match, cnt_match = [0, 0, 0, 0], [0]
175 | cnts_order, cnt_order = [0, 0, 0, 0], [0]
176 | for i in xrange(N):
177 | d, na = [], cnt_lst[N + i + 1] - cnt_lst[N + i]
178 | for j in xrange(N):
179 | nb = cnt_lst[j + 1] - cnt_lst[j]
180 | t = np.zeros((nb, na))
181 | for b in xrange(cnt_lst[j], cnt_lst[j + 1]):
182 | for a in xrange(cnt_lst[N + i], cnt_lst[N + i + 1]):
183 | g = f[a] - f[b]
184 | t[b - cnt_lst[j], a - cnt_lst[N + i]] = (g * g).sum()
185 | d.append(t)
186 | print 'cam0', i
187 | update_cnts(calc_mean(d), cnts_mean, cnt_mean, N, i)
188 | update_cnts(calc_median(d), cnts_median, cnt_median, N, i)
189 | update_cnts(calc_min(d), cnts_min, cnt_min, N, i)
190 | update_cnts(calc_match(d), cnts_match, cnt_match, N, i)
191 | #update_cnts(calc_order(d), cnts_order, cnt_order, N, i)
192 | sys.stdout.flush()
193 | for i in xrange(N):
194 | d, na = [], cnt_lst[i + 1] - cnt_lst[i]
195 | for j in xrange(N):
196 | nb = cnt_lst[N + j + 1] - cnt_lst[N + j]
197 | t = np.zeros((nb, na))
198 | for b in xrange(cnt_lst[N + j], cnt_lst[N + j + 1]):
199 | for a in xrange(cnt_lst[i], cnt_lst[i + 1]):
200 | g = f[a] - f[b]
201 | t[b - cnt_lst[N + j], a - cnt_lst[i]] = (g * g).sum()
202 | d.append(t)
203 | print 'cam1', i
204 | update_cnts(calc_mean(d), cnts_mean, cnt_mean, N, i)
205 | update_cnts(calc_median(d), cnts_median, cnt_median, N, i)
206 | update_cnts(calc_min(d), cnts_min, cnt_min, N, i)
207 | update_cnts(calc_match(d), cnts_match, cnt_match, N, i)
208 | #update_cnts(calc_order(d), cnts_order, cnt_order, N, i)
209 | sys.stdout.flush()
210 | print 'min', norm_cnts(cnts_min, cnt_min)
211 | print 'mean', norm_cnts(cnts_mean, cnt_mean)
212 | print 'median', norm_cnts(cnts_median, cnt_median)
213 | print 'match', norm_cnts(cnts_match, cnt_match)
214 | sys.stdout.flush()
215 |
216 | def parse_args():
217 | parser = argparse.ArgumentParser(
218 | description='Calc CMC Rank for ilds&prid dataset')
219 | parser.add_argument('--data', type=str,
220 | default='features/image_test-prid_baseline_b4.csv',
221 | help='data path')
222 | parser.add_argument('--list', type=str,
223 | #default='/data3/matt/iLIDS-VID/recs/image_test.txt',
224 | default='/data3/matt/prid_2011/recs/image_test.txt',
225 | help='list path')
226 | return parser.parse_args()
227 |
228 | args = parse_args()
229 | print args
230 |
231 | f, cnt_lst = np.loadtxt(args.data), np.loadtxt(args.list).astype(int)
232 | N = cnt_lst.shape[0] / 2
233 | for i in xrange(N):
234 | for a in xrange(cnt_lst[i] + 1, cnt_lst[i + 1]):
235 | f[a] += f[a - 1]
236 | for a in xrange(cnt_lst[i] + 1, cnt_lst[i + 1]):
237 | f[a] /= a - cnt_lst[i] + 1
238 | for i in xrange(N):
239 | for a in xrange(cnt_lst[N + i] + 1, cnt_lst[N + i + 1]):
240 | f[a] += f[a - 1]
241 | for a in xrange(cnt_lst[N + i] + 1, cnt_lst[N + i + 1]):
242 | f[a] /= a - cnt_lst[N + i] + 1
243 |
244 | pooling_method(f, N)
245 | other_method(f, N)
246 |
247 |
--------------------------------------------------------------------------------
/RL/find_eg.py:
--------------------------------------------------------------------------------
1 | import sys
2 | #sys.path.insert(0, "mxnet/python/")
3 | import find_mxnet
4 | import mxnet as mx
5 |
6 | import numpy as np
7 | import argparse
8 | from sklearn.metrics import average_precision_score
9 |
10 | from batch_provider import BatchProvider
11 | from utils import get_imRecordIter, load_checkpoint
12 | from agent import sym_base_net, wash, get_Qvalue, create_moduleQ
13 |
14 | import cv2, os
15 |
16 | import matplotlib as mpl
17 | import matplotlib.pyplot as plt
18 |
19 | mpl.rcParams['lines.linewidth'] = 1.5
20 | mpl.rcParams['savefig.dpi'] = 300
21 | mpl.rcParams['ps.useafm'] = True
22 | mpl.rcParams['pdf.use14corefonts'] = True
23 | mpl.rcParams['text.usetex'] = True
24 | mpl.rcParams['font.size'] = 24
25 | mpl.rcParams['font.family'] = "Times New Roman"
26 | mpl.rcParams['legend.fontsize'] = "small"
27 | mpl.rcParams['legend.fancybox'] = True
28 | mpl.rcParams['lines.markersize'] = 10
29 | mpl.rcParams['figure.figsize'] = 9, 6.3
30 | mpl.rcParams['legend.labelspacing'] = 0.1
31 | mpl.rcParams['legend.borderpad'] = 0.1
32 | mpl.rcParams['legend.borderaxespad'] = 0.2
33 | mpl.rcParams['font.monospace'] = "Courier 10 Pitch"
34 | mpl.rcParams['text.latex.preamble'] = [r'\boldmath']
35 |
36 |
37 | def plot(Q, terminal, name):
38 | t = [1+_ for _ in xrange(terminal)]
39 | a = [Q[_, 1] for _ in xrange(terminal)]
40 | b = [Q[_, 0] for _ in xrange(terminal)]
41 | c = [Q[_, 2] for _ in xrange(terminal)]
42 |
43 | plt.figure(figsize=(10,9))
44 | ax = plt.gca()
45 |
46 | plt.plot(t, a,
47 | marker='o',
48 | markersize=12,
49 | markerfacecolor=(0, 1, 0, 0.5),
50 | color='g',
51 | label='same',
52 | alpha=0.5,
53 | )
54 |
55 | plt.plot(t, b,
56 | marker='x',
57 | markersize=12,
58 | markerfacecolor=(1, 0, 0, 0.5),
59 | color='r',
60 | label='different',
61 | alpha=0.5,
62 | )
63 |
64 | plt.plot(t, c,
65 | marker='^',
66 | markersize=12,
67 | markerfacecolor=(1, 1, 0, 0.5),
68 | color='y',
69 | label='unsure',
70 | alpha=0.5,
71 | )
72 |
73 | plt.ylabel(r'\textbf{Q-Value}')
74 | plt.xlabel(r'\textbf{\#. Time Steps}')
75 | plt.grid(linestyle=':')
76 | plt.savefig('%s.pdf'%name)
77 |
78 |
79 | def parse_args():
80 | parser = argparse.ArgumentParser(
81 | description='single domain car recog training')
82 | parser.add_argument('--gpus', type=str, default='2',
83 | help='the gpus will be used, e.g "0,1"')
84 | parser.add_argument('--model-load-epoch', type=int, default=3,
85 | help='load the model on an epoch using the model-load-prefix')
86 | parser.add_argument('--model-load-prefix', type=str, default='ilds-TEST-DQN_test-1-2017.11.14-23.56.43-bs4-ss8-incp_prep__nobg_noregQv_block2_f2_nofus0-2_poscontra_fne0.1-1-1_tisr1-sgd_t500-_qg0.9-up0.2-vtd4.0-_lr1e1-_32-1024-_na3-3',
87 | help='load model prefix')
88 | parser.add_argument('--batch-size', type=int, default=1,
89 | help='the batch size')
90 | parser.add_argument('--boost-times', type=int, default=1,
91 | help='boosting times to increase robustness')
92 | return parser.parse_args()
93 |
94 |
95 | def get_train_args(name):
96 | fn = open('log/%s.log'%name)
97 | s = fn.readline()[10:]
98 | fn.close()
99 | s = 'ret=argparse.' + s
100 | exec(s)
101 | return ret
102 |
103 | test_args = parse_args()
104 | print 'test arg:', test_args
105 | devices = [mx.gpu(int(i)) for i in test_args.gpus.split(',')]
106 | args = get_train_args(test_args.model_load_prefix)
107 | print 'train arg:', args
108 |
109 | batch_size = args.batch_size
110 | num_epoch = args.num_epoches
111 |
112 | arg_params, aux_params = load_checkpoint('models/%s' % test_args.model_load_prefix, test_args.model_load_epoch)
113 | data1, data2 = sym_base_net(args.network, is_train=args.e2e, global_stats=True)
114 | Q = create_moduleQ(data1, data2, devices, args.sample_size, args.num_sim, args.num_hidden, args.num_acts, args.min_states, args.min_imgs, fusion=args.fusion, is_train=True, nh=not args.history, is_e2e=args.e2e, bn=args.q_bn)
115 | Q.init_params(initializer=None,
116 | arg_params=arg_params,
117 | aux_params=aux_params,
118 | allow_missing=False,
119 | force_init=True)
120 |
121 |
122 | valid_iter = get_imRecordIter(
123 | args, 'recs/%s'%args.valid_set, (3, 224, 112), 1,
124 | shuffle=False, aug=False, even_iter=True)
125 | train_iter = get_imRecordIter(
126 | args, 'recs/%s'%args.train_set, (3, 224, 112), 1,
127 | shuffle=False, aug=True, even_iter=True)
128 |
129 | valid_lst = np.loadtxt('%s/recs/%s.txt'%(args.data_dir, args.valid_set)).astype(int)
130 | train_lst = np.loadtxt('%s/recs/%s.txt'%(args.data_dir, args.train_set)).astype(int)
131 |
132 | valid = BatchProvider(valid_iter, valid_lst, False, args.sample_size, sample_ratio=0.5, is_valid=True, need_feat=args.history)
133 | train = BatchProvider(train_iter, train_lst, True, args.sample_size, sample_ratio=0.5, need_feat=args.history)
134 | N = args.num_id
135 |
136 | cmcs, ap, cmcn, vscores, vturns = [[], [], [], []], [], [1, 5, 10, 20], [], []
137 | max_penalty=1
138 |
139 |
140 | def tocv2(im):
141 | newim = np.zeros_like(im)
142 | newim[0] = im[2]
143 | newim[1] = im[1]
144 | newim[2] = im[0]
145 | newim = np.transpose(newim, (1, 2, 0)) + 128
146 | newim = newim.astype(np.uint8)
147 | print newim
148 | return newim
149 |
150 | valid.reset()
151 | batch, valid_cnt, vv, vpool = 0, 0, np.zeros((N*2, N)), set()
152 | vs, vt = [0 for i in xrange(N+N)], [0 for i in xrange(N+N)]
153 | fts = [[0 for _2 in xrange(N)] for _1 in xrange(N)]
154 | fdir = 'plots/%s'%args.mode
155 | os.system('mkdir %s'%fdir)
156 | for i in xrange(args.sample_size + 1):
157 | os.system('mkdir %s/%d'%(fdir,i))
158 | for j in xrange(4):
159 | os.system('mkdir %s/%d/%d'%(fdir,i,j))
160 |
161 | while valid_cnt < N*N:
162 | batch += 1
163 | cur, a, b = valid.provide()
164 | y = ((a %N) == (b % N))
165 | data_batch = wash(cur, devices[0])
166 | Qvalue = get_Qvalue(Q, data_batch, is_train=False)
167 | print Q.get_outputs()[1].asnumpy()
168 | print Qvalue
169 | i = 0
170 | while i < args.sample_size:
171 | if args.total_forward:
172 | if i + 1 < args.sample_size:
173 | k = 2
174 | else:
175 | k = np.argmax(Qvalue[i, :2])
176 | else:
177 | k = np.argmax(Qvalue[i])
178 | cls = k % args.acts_per_round
179 | step = k - 1
180 | if cls >= 2:
181 | if i + step >= args.sample_size:
182 | r = -max_penalty
183 | terminal = True
184 | else:
185 | r = -args.penalty * (2.0 - (0.5 ** (step - 1)))
186 | terminal = False
187 | else:
188 | r = 1 if cls == y else -max_penalty
189 | terminal = True
190 | if args.pos_weight > 1:
191 | if y:
192 | r *= args.pos_weight
193 | else:
194 | if not y:
195 | r /= args.pos_weight
196 | print 'valid', i, (a, b), Qvalue[i], k, (y, cls), r
197 | va, vb = a, b % N
198 | if (va, vb) not in vpool:
199 | vs[va] += r
200 | vs[vb+N] += r
201 | if terminal:
202 | if (va, vb) not in vpool:
203 | fts[va][vb] = (k + (3 if va == vb else 0), i)
204 | vpool.add((va, vb))
205 | valid_cnt += 1
206 | vv[va][vb] = Qvalue[i][0] - Qvalue[i][1]
207 | vt[va] += i + 1
208 | vv[vb+N][va] += vv[va][vb]
209 | vt[vb+N] += i + 1
210 | print va, vb, vv[va][vb], r
211 | if terminal and r == 1 and i > 0:
212 | img = np.zeros((3, cur[0][0].shape[1] * 2, cur[0][0].shape[2] * args.sample_size))
213 | print va, vb, i
214 | for j in xrange(args.sample_size):
215 | img[:, :cur[0][j].shape[1], cur[0][j].shape[2]*j:cur[0][j].shape[2]*(j+1)] = cur[0][j]
216 | img[:, cur[0][j].shape[1]:, cur[1][j].shape[2]*j:cur[1][j].shape[2]*(j+1)] = cur[1][j]
217 | name = '%s/%d/%d/%d-%d'%(fdir, i if cls < 2 else args.sample_size, ((2 if y else 0) + (1 if Qvalue[i, 1] > Qvalue[i, 0] else 0)), va, vb)
218 | cv2.imwrite('%s.png'%(name), tocv2(img))
219 | np.savetxt('%s.txt'%name, Qvalue)
220 | plot(Qvalue, i + 1, name)
221 | if terminal:
222 | break
223 | i += step
224 | for i in xrange(N*2):
225 | a, r = i % N, 0
226 | for b in xrange(N):
227 | if a != b and vv[i][b] <= vv[i][a]:
228 | r += 1
229 | for k in xrange(4):
230 | cmcs[k].append(1.0 if r < cmcn[k] else 0.0)
231 | vscores += [vs[i]]
232 | vturns += [vt[i]]
233 | score = np.array([-vv[i][_] for _ in xrange(N)])
234 | label = np.array([(1 if _ == a else 0) for _ in xrange(N)])
235 | ap.append(average_precision_score(label, score))
236 | print 'ap', i, ap[-1]
237 | cnt_map = [[0 for j in xrange(6)] for i in xrange(args.sample_size)]
238 | for i in xrange(N):
239 | for j in xrange(N):
240 | cnt_map[fts[i][j][1]][fts[i][j][0]] += 1
241 |
242 |
--------------------------------------------------------------------------------
/baseline/baseline.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.insert(0, "mxnet/python/")
3 | import find_mxnet
4 | import mxnet as mx
5 | import logging
6 | import numpy as np
7 | import argparse
8 | from mxnet.optimizer import SGD
9 | import loss_layers
10 | import lsoftmax
11 | from verifi_iterator import verifi_iterator
12 | from even_iterator import Even_iterator
13 | import importlib
14 |
15 |
16 | def build_network(symbol, num_id, batchsize):
17 | '''
18 | network structure
19 | '''
20 | # concat = internals["ch_concat_5b_chconcat_output"]
21 | pooling = mx.symbol.Pooling(
22 | data=symbol, kernel=(1, 1), global_pool=True,
23 | pool_type='avg', name='global_pool')
24 | flatten = mx.symbol.Flatten(data=pooling, name='flatten')
25 | l2 = mx.symbol.L2Normalization(data=flatten, name='l2_norm')
26 | dropout = l2#mx.symbol.Dropout(data=l2, name='dropout1')
27 |
28 | if args.lsoftmax:
29 | #fc1 = mx.symbol.Custom(data=flatten, num_hidden=num_id, beta=1000, margin=3, scale=0.9999, beta_min=1, op_type='LSoftmax', name='lsoftmax')
30 | fc1 = mx.symbol.LSoftmax(data=flatten, num_hidden=num_id, beta=1000, margin=4, scale=0.99999, beta_min=3, name='lsoftmax')
31 | else:
32 | fc1 = mx.symbol.FullyConnected(data=flatten, num_hidden=num_id, name='cls_fc1')
33 |
34 | softmax1 = mx.symbol.SoftmaxOutput(data=fc1, name='softmax')
35 |
36 | outputs = [softmax1]
37 | if args.verifi:
38 | verifi = mx.symbol.Custom(data=dropout, grad_scale=1.0, threshd=args.verifi_threshd, op_type='verifiLoss', name='verifi')
39 | outputs.append(verifi)
40 |
41 | if args.triplet:
42 | triplet = mx.symbol.Custom(data=dropout, grad_scale=1.0, threshd=args.triplet_threshd, op_type='tripletLoss', name='triplet')
43 | outputs.append(triplet)
44 |
45 | if args.lmnn:
46 | lmnn = mx.symbol.Custom(data=dropout, epsilon=0.1, threshd=0.9, op_type='lmnnLoss', name='lmnn')
47 | outputs.append(lmnn)
48 |
49 | if args.center:
50 | center = mx.symbol.Custom(data=dropout, op_type='centerLoss', name='center', num_class=num_id, alpha=0.5, scale=1.0, batchsize=batchsize)
51 | outputs.append(center)
52 |
53 | return mx.symbol.Group(outputs)
54 |
55 |
56 | class Multi_Metric(mx.metric.EvalMetric):
57 | """Calculate accuracies of multi label"""
58 | def __init__(self, num=None, cls=1):
59 | super(Multi_Metric, self).__init__('multi-metric', num)
60 | self.cls = cls
61 |
62 | def update(self, labels, preds):
63 | # mx.metric.check_label_shapes(labels, preds)
64 | # classification loss
65 | for i in range(self.cls):
66 | pred_label = mx.nd.argmax_channel(preds[i])
67 | pred_label = pred_label.asnumpy().astype('int32')
68 | label = labels[i].asnumpy().astype('int32')
69 |
70 | mx.metric.check_label_shapes(label, pred_label)
71 |
72 | if self.num is None:
73 | self.sum_metric += (pred_label.flat == label.flat).sum()
74 | self.num_inst += len(pred_label.flat)
75 | else:
76 | self.sum_metric[i] += (pred_label.flat == label.flat).sum()
77 | self.num_inst[i] += len(pred_label.flat)
78 |
79 | # verification losses
80 | for i in range(self.cls, len(preds)):
81 | pred = preds[i].asnumpy()
82 | if self.num is None:
83 | self.sum_metric += np.sum(pred)
84 | self.num_inst += len(pred)
85 | else:
86 | self.sum_metric[i] += np.sum(pred)
87 | self.num_inst[i] += len(pred)
88 |
89 | def get_imRecordIter(name, input_shape, batch_size, kv, shuffle=False, aug=False, even_iter=False):
90 | '''
91 | get iterator use even_iterator or ImageRecordIter
92 | '''
93 | if even_iter:
94 | aug_params = {}
95 | aug_params['resize'] = 128
96 | aug_params['rand_crop'] = aug
97 | aug_params['rand_mirror'] = aug
98 | aug_params['input_shape'] = input_shape
99 | aug_params['mean'] = 128.0
100 |
101 | dataiter = Even_iterator(
102 | '%s/%s.lst' % (args.data_dir, name),
103 | batch_size=batch_size / 2,
104 | aug_params=aug_params,
105 | shuffle=shuffle)
106 | else:
107 | dataiter = mx.io.ImageRecordIter(
108 | path_imglist="%s/%s.lst" % (args.data_dir, name),
109 | path_imgrec="%s/%s.rec" % (args.data_dir, name),
110 | # mean_img="models/market_mean.bin",
111 | mean_r=128.0,
112 | mean_g=128.0,
113 | mean_b=128.0,
114 | rand_crop=aug,
115 | rand_mirror=aug,
116 | prefetch_buffer=4,
117 | preprocess_threads=3,
118 | shuffle=shuffle,
119 | label_width=1,
120 | round_batch=False,
121 | data_shape=input_shape,
122 | batch_size=batch_size / 2,
123 | num_parts=kv.num_workers,
124 | part_index=kv.rank)
125 |
126 | return dataiter
127 |
128 |
129 | def get_iterators(batch_size, input_shape, train, test, kv, gpus=1):
130 | '''
131 | use image lists to generate data iterators
132 | '''
133 | train_dataiter1 = get_imRecordIter(
134 | '%s_even' % train, input_shape, batch_size,
135 | kv, shuffle=args.even_iter, aug=True, even_iter=args.even_iter)
136 | train_dataiter2 = get_imRecordIter(
137 | '%s_rand' % train, input_shape, batch_size,
138 | kv, shuffle=True, aug=True)
139 | val_dataiter1 = get_imRecordIter(
140 | '%s_even' % test, input_shape, batch_size,
141 | kv, shuffle=False, aug=False, even_iter=args.even_iter)
142 | val_dataiter2 = get_imRecordIter(
143 | '%s_rand' % test, input_shape, batch_size,
144 | kv, shuffle=False, aug=False)
145 |
146 | return verifi_iterator(
147 | train_dataiter1, train_dataiter2, use_verifi=args.verifi, use_center=args.center, use_lsoftmax=args.lsoftmax, gpus=gpus), \
148 | verifi_iterator(
149 | val_dataiter1, val_dataiter2, use_verifi=args.verifi, use_center=args.center, use_lsoftmax=args.lsoftmax, gpus=gpus)
150 |
151 |
152 | def parse_args():
153 | parser = argparse.ArgumentParser(
154 | description='single domain car recog training')
155 | parser.add_argument('--gpus', type=str, default='5',
156 | help='the gpus will be used, e.g "0,1"')
157 | parser.add_argument('--data-dir', type=str,
158 | default="/data3/matt/iLIDS-VID/recs",
159 | help='data directory')
160 | parser.add_argument('--num-examples', type=int, default=20000,
161 | help='the number of training examples')
162 | parser.add_argument('--num-id', type=int, default=150,
163 | help='the number of training ids')
164 | parser.add_argument('--batch-size', type=int, default=4,
165 | help='the batch size')
166 | parser.add_argument('--lr', type=float, default=1e-2,
167 | help='the initial learning rate')
168 | parser.add_argument('--num-epoches', type=int, default=1,
169 | help='the number of training epochs')
170 | parser.add_argument('--mode', type=str, default='ilds_baseline_b4',
171 | help='save names of model and log')
172 | parser.add_argument('--lsoftmax', action='store_true', default=False,
173 | help='if use large margin softmax')
174 | parser.add_argument('--verifi-label', action='store_true', default=False,
175 | help='if add verifi label')
176 | parser.add_argument('--verifi', action='store_true', default=False,
177 | help='if use verifi loss')
178 | parser.add_argument('--triplet', action='store_true', default=False,
179 | help='if use triplet loss')
180 | parser.add_argument('--lmnn', action='store_true', default=True,
181 | help='if use LMNN loss')
182 | parser.add_argument('--center', action='store_true', default=False,
183 | help='if use center loss')
184 | parser.add_argument('--verifi-threshd', type=float, default=0.9,
185 | help='verification threshold')
186 | parser.add_argument('--triplet-threshd', type=float, default=0.9,
187 | help='triplet threshold')
188 | parser.add_argument('--train-file', type=str, default="image_train",
189 | help='train file')
190 | parser.add_argument('--test-file', type=str, default="image_valid",
191 | help='test file')
192 | parser.add_argument('--kv-store', type=str,
193 | default='device', help='the kvstore type')
194 | parser.add_argument('--network', type=str,
195 | default='inception-bn', help='network name')
196 | parser.add_argument('--model-load-epoch', type=int, default=126,
197 | help='load the model on an epoch using the model-load-prefix')
198 | parser.add_argument('--model-load-prefix', type=str, default='inception-bn',
199 | help='load model prefix')
200 | parser.add_argument('--even-iter', action='store_true', default=False,
201 | help='if use even iterator')
202 | return parser.parse_args()
203 |
204 |
205 | def load_checkpoint(prefix, epoch):
206 | # symbol = sym.load('%s-symbol.json' % prefix)
207 | save_dict = mx.nd.load('%s-%04d.params' % (prefix, epoch))
208 | arg_params = {}
209 | aux_params = {}
210 | for k, v in save_dict.items():
211 | tp, name = k.split(':', 1)
212 | if tp == 'arg':
213 | arg_params[name] = v
214 | if tp == 'aux':
215 | aux_params[name] = v
216 | return (arg_params, aux_params)
217 |
218 |
219 | args = parse_args()
220 |
221 | print args
222 | batch_size = args.batch_size
223 | num_epoch = args.num_epoches
224 | devices = [mx.gpu(int(i)) for i in args.gpus.split(',')]
225 | lr = args.lr
226 | num_images = args.num_examples
227 |
228 |
229 | arg_params, aux_params = load_checkpoint(
230 | 'models/%s' % args.model_load_prefix, args.model_load_epoch)
231 |
232 | symbol = importlib.import_module(
233 | 'symbol_' + args.network).get_symbol()
234 |
235 | #batchsize4center=batch_size / len(devices)
236 | net = build_network(symbol, num_id=args.num_id, batchsize= batch_size)
237 |
238 | kv = mx.kvstore.create(args.kv_store)
239 | train, val = get_iterators(
240 | batch_size=batch_size, input_shape=(3, 224, 112),
241 | train=args.train_file, test=args.test_file, kv=kv, gpus=len(devices))
242 | print train.batch_size
243 | #train = get_imRecordIter(args.train_file, (3, 224, 112), batch_size, kv)
244 | #val = get_imRecordIter(args.test_file, (3, 224, 112), batch_size, kv)
245 |
246 | stepPerEpoch = int(num_images * 2 / batch_size)
247 | lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(
248 | step=[stepPerEpoch * x for x in [50, 75]], factor=0.1)
249 | init = mx.initializer.Xavier(
250 | rnd_type='gaussian', factor_type='in', magnitude=2)
251 |
252 | arg_names = net.list_arguments()
253 | sgd = SGD(learning_rate=args.lr, momentum=0.9,
254 | wd=0.0005, clip_gradient=10, lr_scheduler=lr_scheduler,
255 | rescale_grad=1.0 / batch_size)
256 |
257 |
258 | logging.basicConfig(filename='log/%s.log' % args.mode, level=logging.DEBUG)
259 | logger = logging.getLogger()
260 | logger.setLevel(logging.DEBUG)
261 | logging.info(args)
262 |
263 | print ('begining of mx.model.feedforward')
264 |
265 | model = mx.model.FeedForward(
266 | symbol=net, ctx=devices, num_epoch=num_epoch, arg_params=arg_params,
267 | aux_params=aux_params, initializer=init, optimizer=sgd)
268 |
269 | prefix = 'models/%s' % args.mode
270 | num = 1
271 | if args.verifi:
272 | num += 1
273 | if args.triplet:
274 | num += 1
275 | if args.lmnn:
276 | num += 1
277 | if args.center:
278 | num += 1
279 |
280 |
281 | eval_metric=Multi_Metric(num=num, cls=1)
282 | epoch_end_callback=mx.callback.do_checkpoint(prefix)
283 | batch_end_callback=mx.callback.Speedometer(batch_size=batch_size)
284 | print ('begining of model.fit')
285 | model.fit(X=train, eval_data=val, eval_metric=eval_metric, logger=logger, epoch_end_callback=epoch_end_callback, batch_end_callback=batch_end_callback)
286 | print('done')
--------------------------------------------------------------------------------
/baseline/lsoftmax.py:
--------------------------------------------------------------------------------
1 | import os
2 | import math
3 | import mxnet as mx
4 | import numpy as np
5 |
6 |
7 | # MXNET_CPU_WORKER_NTHREADS must be greater than 1 for custom op to work on CPU
8 | os.environ['MXNET_CPU_WORKER_NTHREADS'] = '2'
9 |
10 |
11 | class LSoftmaxOp(mx.operator.CustomOp):
12 | '''LSoftmax from
13 | '''
14 |
15 | def __init__(self, margin, beta, beta_min, scale):
16 | self.margin = int(margin)
17 | self.beta = float(beta)
18 | self.beta_min = float(beta_min)
19 | self.scale = float(scale)
20 | self.c_map = []
21 | self.k_map = []
22 | c_m_n = lambda m, n: math.factorial(n) / math.factorial(m) / math.factorial(n-m)
23 | for i in range(margin+1):
24 | self.c_map.append(c_m_n(i, margin))
25 | self.k_map.append(math.cos(i * math.pi / margin))
26 |
27 | def find_k(self, cos_t):
28 | '''find k for cos(theta)
29 | '''
30 | # for numeric issue
31 | eps = 1e-5
32 | le = lambda x, y: x < y or abs(x-y) < eps
33 | for i in range(self.margin):
34 | if le(self.k_map[i+1], cos_t) and le(cos_t, self.k_map[i]):
35 | return i
36 | raise ValueError('can not find k for cos_t = %f'%cos_t)
37 |
38 | def calc_cos_mt(self, cos_t):
39 | '''calculate cos(m*theta)
40 | '''
41 | cos_mt = 0
42 | sin2_t = 1 - cos_t * cos_t
43 | flag = -1
44 | for p in range(self.margin / 2 + 1):
45 | flag *= -1
46 | cos_mt += flag * self.c_map[2*p] * pow(cos_t, self.margin-2*p) * pow(sin2_t, p)
47 | return cos_mt
48 |
49 | def forward(self, is_train, req, in_data, out_data, aux):
50 | assert len(in_data) == 3
51 | assert len(out_data) == 1
52 | assert len(req) == 1
53 | x, label, w = in_data
54 | x = x.asnumpy()
55 | w = w.asnumpy()
56 | label = label.asnumpy()
57 | #print "lsoftmax label", label
58 | eps= 1e-5
59 | # original fully connected
60 | out = x.dot(w.T)
61 | if is_train:
62 | # large margin fully connected
63 | n = label.shape[0]
64 | w_norm = np.linalg.norm(w, axis=1)
65 | x_norm = np.linalg.norm(x, axis=1)
66 | for i in range(n):
67 | j = yi = int(label[i])
68 | f = out[i, yi]
69 | cos_t = f / (w_norm[yi] * x_norm[i]+eps)
70 | # calc k and cos_mt
71 | k = self.find_k(cos_t)
72 | cos_mt = self.calc_cos_mt(cos_t)
73 | # f_i_j = (\beta * f_i_j + fo_i_j) / (1 + \beta)
74 | fo_i_j = f
75 | f_i_j = (pow(-1, k) * cos_mt - 2*k) * (w_norm[yi] * x_norm[i])
76 | #print j,yi,cos_t,k,cos_mt,fo_i_j,f_i_j
77 | out[i, yi] = (f_i_j + self.beta * fo_i_j) / (1 + self.beta)
78 | self.assign(out_data[0], req[0], mx.nd.array(out))
79 |
80 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
81 | assert len(in_data) == 3
82 | assert len(out_grad) == 1
83 | assert len(in_grad) == 3
84 | assert len(req) == 3
85 | x, label, w = in_data
86 | x = x.asnumpy()
87 | w = w.asnumpy()
88 | label = label.asnumpy()
89 | o_grad = out_grad[0].asnumpy()
90 | # original fully connected
91 | x_grad = o_grad.dot(w)
92 | w_grad = o_grad.T.dot(x)
93 | # large margin fully connected
94 | n = label.shape[0] # batch size
95 | m = w.shape[0] # number of classes
96 | margin = self.margin # margin
97 | feature_dim = w.shape[1] # feature dimension
98 | cos_t = np.zeros(n, dtype=np.float32) # cos(theta)
99 | cos_mt = np.zeros(n, dtype=np.float32) # cos(margin * theta)
100 | sin2_t = np.zeros(n, dtype=np.float32) # sin(theta) ^ 2
101 | fo = np.zeros(n, dtype=np.float32) # fo_i = dot(x_i, w_yi)
102 | k = np.zeros(n, dtype=np.int32)
103 | x_norm = np.linalg.norm(x, axis=1)
104 | w_norm = np.linalg.norm(w, axis=1)
105 | eps=1e-5
106 | for i in range(n):
107 | j = yi = int(label[i])
108 | f = w[yi].dot(x[i])
109 | cos_t[i] = f / (w_norm[yi] * x_norm[i]+eps)
110 | k[i] = self.find_k(cos_t[i])
111 | cos_mt[i] = self.calc_cos_mt(cos_t[i])
112 | sin2_t[i] = 1 - cos_t[i]*cos_t[i]
113 | fo[i] = f
114 | # gradient w.r.t. x_i
115 | for i in range(n):
116 | # df / dx at x = x_i, w = w_yi
117 | j = yi = int(label[i])
118 | dcos_dx = w[yi] / (w_norm[yi]*x_norm[i]+eps) - x[i] * fo[i] / (w_norm[yi]*pow(x_norm[i], 3)+eps)
119 | dsin2_dx = -2 * cos_t[i] * dcos_dx
120 | dcosm_dx = margin*pow(cos_t[i], margin-1) * dcos_dx # p = 0
121 | flag = 1
122 | for p in range(1, margin / 2 + 1):
123 | flag *= -1
124 | dcosm_dx += flag * self.c_map[2*p] * ( \
125 | p*pow(cos_t[i], margin-2*p)*pow(sin2_t[i], p-1)*dsin2_dx + \
126 | (margin-2*p)*pow(cos_t[i], margin-2*p-1)*pow(sin2_t[i], p)*dcos_dx)
127 | df_dx = (pow(-1, k[i]) * cos_mt[i] - 2*k[i]) * w_norm[yi] / (x_norm[i]+eps) * x[i] + \
128 | pow(-1, k[i]) * w_norm[yi] * x_norm[i] * dcosm_dx
129 | alpha = 1 / (1 + self.beta)
130 | x_grad[i] += alpha * o_grad[i, yi] * (df_dx - w[yi])
131 | # gradient w.r.t. w_j
132 | for j in range(m):
133 | dw = np.zeros(feature_dim, dtype=np.float32)
134 | for i in range(n):
135 | yi = int(label[i])
136 | if yi == j:
137 | # df / dw at x = x_i, w = w_yi and yi == j
138 | dcos_dw = x[i] / (w_norm[yi]*x_norm[i]+eps) - w[yi] * fo[i] / (x_norm[i]*pow(w_norm[yi], 3)+eps)
139 | dsin2_dw = -2 * cos_t[i] * dcos_dw
140 | dcosm_dw = margin*pow(cos_t[i], margin-1) * dcos_dw # p = 0
141 | flag = 1
142 | for p in range(1, margin / 2 + 1):
143 | flag *= -1
144 | dcosm_dw += flag * self.c_map[2*p] * ( \
145 | p*pow(cos_t[i], margin-2*p)*pow(sin2_t[i], p-1)*dsin2_dw + \
146 | (margin-2*p)*pow(cos_t[i], margin-2*p-1)*pow(sin2_t[i], p)*dcos_dw)
147 | df_dw_j = (pow(-1, k[i]) * cos_mt[i] - 2*k[i]) * x_norm[i] / (w_norm[yi]+eps) * w[yi] + \
148 | pow(-1, k[i]) * w_norm[yi] * x_norm[i] * dcosm_dw
149 | dw += o_grad[i, yi] * (df_dw_j - x[i])
150 | alpha = 1 / (1 + self.beta)
151 | w_grad[j] += alpha * dw
152 | #x_grad[:], w_grad[:] = 0, 0
153 | self.assign(in_grad[0], req[0], mx.nd.array(x_grad))
154 | self.assign(in_grad[2], req[2], mx.nd.array(w_grad))
155 | # dirty hack, should also work for multi devices
156 | self.beta *= self.scale
157 | self.beta = max(self.beta, self.beta_min)
158 |
159 |
160 | @mx.operator.register("LSoftmax")
161 | class LSoftmaxProp(mx.operator.CustomOpProp):
162 |
163 | def __init__(self, num_hidden, beta, margin, scale=1, beta_min=0):
164 | super(LSoftmaxProp, self).__init__(need_top_grad=True)
165 | self.margin = int(margin)
166 | self.num_hidden = int(num_hidden)
167 | self.beta = float(beta)
168 | self.beta_min = float(beta_min)
169 | self.scale = float(scale)
170 |
171 | def list_arguments(self):
172 | return ['data', 'label', 'weight']
173 |
174 | def list_outputs(self):
175 | return ['output']
176 |
177 | def infer_shape(self, in_shape):
178 | #print in_shape
179 | assert len(in_shape) == 3, "LSoftmaxOp input data: [data, label, weight]"
180 | dshape = in_shape[0]
181 | lshape = in_shape[1]
182 | assert len(dshape) == 2, "data shape should be (batch_size, feature_dim)"
183 | assert len(lshape) == 1, "label shape should be (batch_size,)"
184 | wshape = (self.num_hidden, dshape[1])
185 | oshape = (dshape[0], self.num_hidden)
186 | return [dshape, lshape, wshape], [oshape,], []
187 |
188 | def create_operator(self, ctx, shapes, dtypes):
189 | return LSoftmaxOp(margin=self.margin, beta=self.beta, beta_min=self.beta_min, scale=self.scale)
190 |
191 |
192 | def test_op():
193 | """test LSoftmax Operator
194 | """
195 | # build symbol
196 | batch_size = cmd_args.batch_size
197 | embedding_dim = cmd_args.embedding_dim
198 | num_classes = cmd_args.num_classes
199 | data = mx.sym.Variable('data')
200 | label = mx.sym.Variable('label')
201 | weight = mx.sym.Variable('weight')
202 | args = {
203 | 'data': np.random.normal(0, 1, (batch_size, embedding_dim)),
204 | 'weight': np.random.normal(0, 1, (num_classes, embedding_dim)),
205 | 'label': np.random.choice(num_classes, batch_size),
206 | }
207 |
208 | if cmd_args.op_impl == 'py':
209 | symbol = mx.sym.Custom(data=data, label=label, weight=weight, num_hidden=10,
210 | beta=cmd_args.beta, margin=cmd_args.margin, scale=cmd_args.scale,
211 | op_type='LSoftmax', name='lsoftmax')
212 | else:
213 | symbol = mx.sym.LSoftmax(data=data, label=label, weight=weight, num_hidden=num_classes,
214 | margin=cmd_args.margin, beta=cmd_args.beta, scale=cmd_args.scale,
215 | name='lsoftmax')
216 |
217 | data_shape = (batch_size, embedding_dim)
218 | label_shape = (batch_size,)
219 | weight_shape = (num_classes, embedding_dim)
220 | ctx = mx.cpu() if cmd_args.op_impl == 'py' else mx.gpu()
221 | executor = symbol.simple_bind(ctx=ctx, data=data_shape, label=label_shape, weight=weight_shape)
222 |
223 | def forward(data, label, weight):
224 | data = mx.nd.array(data, ctx=ctx)
225 | label = mx.nd.array(label, ctx=ctx)
226 | weight = mx.nd.array(weight, ctx=ctx)
227 | executor.forward(is_train=True, data=data, label=label, weight=weight)
228 | return executor.output_dict['lsoftmax_output'].asnumpy()
229 |
230 | def backward(out_grad):
231 | executor.backward(out_grads=[mx.nd.array(out_grad, ctx=ctx)])
232 | return executor.grad_dict
233 |
234 | def gradient_check(name, i, j):
235 | '''gradient check on x[i, j]
236 | '''
237 | eps = 1e-4
238 | threshold = 1e-2
239 | reldiff = lambda a, b: abs(a-b) / (abs(a) + abs(b))
240 | # calculate by backward
241 | output = forward(data=args['data'], weight=args['weight'], label=args['label'])
242 | grad_dict = backward(output)
243 | grad = grad_dict[name].asnumpy()[i, j]
244 | # calculate by \delta f / 2 * eps
245 | loss = lambda x: np.square(x).sum() / 2
246 | args[name][i, j] -= eps
247 | loss1 = loss(forward(data=args['data'], weight=args['weight'], label=args['label']))
248 | args[name][i, j] += 2 * eps
249 | loss2 = loss(forward(data=args['data'], weight=args['weight'], label=args['label']))
250 | grad_expect = (loss2 - loss1) / (2 * eps)
251 | # check
252 | rel_err = reldiff(grad_expect, grad)
253 | if rel_err > threshold:
254 | print 'gradient check failed'
255 | print 'expected %lf given %lf, relative error %lf'%(grad_expect, grad, rel_err)
256 | return False
257 | else:
258 | print 'gradient check pass'
259 | return True
260 |
261 | # test forward
262 | output = forward(data=args['data'], weight=args['weight'], label=args['label'])
263 | diff = args['data'].dot(args['weight'].T) - output
264 |
265 | # test backward
266 | # gradient check on data
267 | data_gc_pass = 0
268 | for i in range(args['data'].shape[0]):
269 | for j in range(args['data'].shape[1]):
270 | print 'gradient check on data[%d, %d]'%(i, j)
271 | if gradient_check('data', i, j):
272 | data_gc_pass += 1
273 | # gradient check on weight
274 | weight_gc_pass = 0
275 | for i in range(args['weight'].shape[0]):
276 | for j in range(args['weight'].shape[1]):
277 | print 'gradient check on weight[%d, %d]'%(i, j)
278 | if gradient_check('weight', i, j):
279 | weight_gc_pass += 1
280 | print '===== Summary ====='
281 | print 'gradient on data pass ratio is %lf'%(float(data_gc_pass) / args['data'].size)
282 | print 'gradient on weight pass ratio is %lf'%(float(weight_gc_pass) / args['weight'].size)
283 |
284 |
285 | if __name__ == '__main__':
286 | import argparse
287 |
288 | parser = argparse.ArgumentParser()
289 | parser.add_argument('--batch-size', type=int, default=32, help="test batch size")
290 | parser.add_argument('--num-classes', type=int, default=10, help="test number of classes")
291 | parser.add_argument('--embedding-dim', type=int, default=3, help="test embedding dimension")
292 | parser.add_argument('--margin', type=int, default=2, help="test lsoftmax margin")
293 | parser.add_argument('--beta', type=float, default=10, help="test lsoftmax beta")
294 | parser.add_argument('--scale', type=float, default=1, help="beta scale of every mini-batch")
295 | parser.add_argument('--op-impl', type=str, choices=['py', 'cpp'], default='py', help="test op implementation")
296 | cmd_args = parser.parse_args()
297 | print cmd_args
298 |
299 | # check
300 | if cmd_args.op_impl == 'cpp':
301 | try:
302 | op_creator = mx.sym.LSoftmax
303 | except AttributeError:
304 | print 'No cpp operator for LSoftmax, Skip test'
305 | import sys
306 | sys.exit(0)
307 |
308 | test_op()
309 |
--------------------------------------------------------------------------------
/baseline/loss_layers.py:
--------------------------------------------------------------------------------
1 | import mxnet as mx
2 | import numpy as np
3 |
4 |
5 | class VerfiLoss(mx.operator.CustomOp):
6 | '''
7 | Verfication Loss Layer
8 | '''
9 | def __init__(self, grad_scale, threshd):
10 | self.grad_scale = grad_scale
11 | self.threshd = threshd
12 | self.eps = 1e-5
13 |
14 | def forward(self, is_train, req, in_data, out_data, aux):
15 | # print "forward"
16 | x = in_data[0]
17 | label = in_data[1].asnumpy()
18 | #print "verifi label", label
19 | n = x.shape[0]
20 | ctx = x.context
21 | # y = out_data[0]
22 | # y[:] = 0
23 | # print y.shape
24 | y = np.zeros((x.shape[0], ))
25 | #y = mx.nd.array((n, ), ctx=ctx)
26 | for i in range(x.shape[0]):
27 | #print "forward", i
28 | mask = np.zeros((n, ))
29 | if i<(x.shape[0]/2):
30 | pid = i + 1 if i % 2 == 0 else i - 1
31 | mask[i] = 1
32 | mask[pid] = 1
33 | #mask[np.where(label == label[i])] = 1
34 | #print mask
35 | pos = np.sum(mask)
36 | mask = mx.nd.array(mask, ctx=ctx)
37 | diff = x[i] - x
38 | d = mx.nd.sqrt(mx.nd.sum(diff * diff, axis=1))
39 | d1 = mx.nd.maximum(0, self.threshd - d)
40 | z = mx.nd.sum(mask * d * d) / (pos + self.eps) \
41 | + mx.nd.sum((1 - mask) * d1 * d1) / (n - pos + self.eps)
42 | y[i] = z.asnumpy()[0]
43 |
44 | # y /= x.shape[0]
45 | self.assign(out_data[0], req[0], mx.nd.array(y, ctx=ctx))
46 |
47 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
48 | # print "backward"
49 | x = in_data[0]
50 | #label = in_data[1].asnumpy()
51 | n = x.shape[0]
52 | ctx = x.context
53 | grad = in_grad[0]
54 | grad[:] = 0
55 | for i in range(x.shape[0]):
56 | mask = np.zeros((1, n))
57 | #mask[np.where(label == label[i])] = 1
58 | if i<(x.shape[0]/2):
59 | pid = i + 1 if i % 2 == 0 else i - 1
60 | mask[0,i] = 1
61 | mask[0,pid] = 1
62 | pos = np.sum(mask)
63 | mask = mx.nd.array(mask, ctx=ctx)
64 | diff = x[i] - x
65 | d = mx.nd.sqrt(mx.nd.sum(diff * diff, axis=1))
66 | g1 = mx.nd.minimum(0, (d - self.threshd) / (d + self.eps))
67 | z = mx.nd.dot((1 - mask) * g1.reshape([1, n]), diff)[0]
68 | # print grad[i].shape, z.shape
69 | # grad[i] = z
70 | # print "z"
71 | grad[i] = mx.nd.dot(mask, diff)[0] / (pos + self.eps)\
72 | + mx.nd.dot((1 - mask) * g1.reshape([1, n]), diff)[0] / (n - pos + self.eps)
73 |
74 | grad *= self.grad_scale
75 |
76 |
77 |
78 | @mx.operator.register("verifiLoss")
79 | class VerifiLossProp(mx.operator.CustomOpProp):
80 | def __init__(self, grad_scale=1.0, threshd=0.5):
81 | super(VerifiLossProp, self).__init__(need_top_grad=False)
82 | self.grad_scale = float(grad_scale)
83 | self.threshd = float(threshd)
84 |
85 | def list_arguments(self):
86 | return ['data', 'label']
87 |
88 | def list_outputs(self):
89 | return ['output']
90 |
91 | def infer_shape(self, in_shape):
92 | data_shape = in_shape[0]
93 | label_shape = (in_shape[0][0], )
94 | output_shape = (in_shape[0][0], )
95 | return [data_shape, label_shape], [output_shape]
96 |
97 | def create_operator(self, ctx, shapes, dtypes):
98 | return VerfiLoss(self.grad_scale, self.threshd)
99 |
100 |
101 | class TripletLoss(mx.operator.CustomOp):
102 | '''
103 | Triplet loss layer
104 | '''
105 | def __init__(self, grad_scale=1.0, threshd=0.5):
106 | self.grad_scale = grad_scale
107 | self.threshd = threshd
108 |
109 | def forward(self, is_train, req, in_data, out_data, aux):
110 | x = in_data[0]
111 | y = np.zeros((x.shape[0], ))
112 | ctx = x.context
113 | for i in range(x.shape[0] / 2):
114 | pid = i + 1 if i % 2 == 0 else i - 1
115 | nid = i + int(x.shape[0] / 2)
116 | pdiff = x[i] - x[pid]
117 | ndiff = x[i] - x[nid]
118 | y[i] = mx.nd.sum(pdiff * pdiff).asnumpy()[0] -\
119 | mx.nd.sum(ndiff * ndiff).asnumpy()[0] + self.threshd
120 | if y[i] < 0:
121 | y[i] = 0
122 | # y /= x.shape[0]
123 | self.assign(out_data[0], req[0], mx.nd.array(y, ctx=ctx))
124 |
125 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
126 | x = in_data[0]
127 | y = out_data[0]
128 | grad = in_grad[0]
129 | grad[:] = 0
130 | for i in range(x.shape[0] / 2):
131 | pid = i + 1 if i % 2 == 0 else i - 1
132 | nid = i + int(x.shape[0] / 2)
133 |
134 | if y[i] > 0:
135 | grad[i] += x[nid] - x[pid]
136 | grad[pid] += x[pid] - x[i]
137 | grad[nid] += x[i] - x[nid]
138 |
139 | grad *= self.grad_scale
140 |
141 |
142 |
143 | @mx.operator.register("tripletLoss")
144 | class TripletLossProp(mx.operator.CustomOpProp):
145 | def __init__(self, grad_scale=1.0, threshd=0.5):
146 | super(TripletLossProp, self).__init__(need_top_grad=False)
147 | self.grad_scale = float(grad_scale)
148 | self.threshd = float(threshd)
149 |
150 | def list_arguments(self):
151 | return ['data']
152 |
153 | def list_outputs(self):
154 | return ['output']
155 |
156 | def infer_shape(self, in_shape):
157 | data_shape = in_shape[0]
158 | # label_shape = (in_shape[0][0], )
159 | output_shape = (in_shape[0][0], )
160 | return [data_shape], [output_shape]
161 |
162 | def create_operator(self, ctx, shapes, dtypes):
163 | return TripletLoss(self.grad_scale, self.threshd)
164 |
165 |
166 | class CenterLoss(mx.operator.CustomOp):
167 | def __init__(self, ctx, shapes, dtypes, num_class, alpha, scale=1.0):
168 | if not len(shapes[0]) ==2:
169 | raise ValuerError('dim for input_data should be 2 for CenterLoss')
170 |
171 | self.alpha = alpha
172 | self.batch_size = shapes[0][0]
173 | self.num_class = num_class
174 | self.scale = scale
175 |
176 | def forward(self, is_train, req, in_data, out_data, aux):
177 | x=in_data[0]
178 | labels = in_data[1].asnumpy()
179 | #print "center label", labels
180 | diff = aux [0]
181 | center = aux[1]
182 | #loss=np.zeros((self.batch_size,1))
183 |
184 | for i in range(self.batch_size):
185 | diff[i] = in_data[0][i] - center[int(labels[i])]
186 |
187 | loss = mx.nd.sum(mx.nd.square(diff),axis=1) / self.batch_size /2
188 | self.assign(out_data[0], req[0], loss)
189 |
190 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
191 | diff = aux[0]
192 | center = aux[1]
193 | sum_ = aux[2]
194 |
195 | grad_scale = float(self.scale/self.batch_size)
196 | self.assign(in_grad[0], req[0], diff * grad_scale)
197 |
198 |
199 | #update the center
200 | labels = in_data[1].asnumpy()
201 | label_occur = dict()
202 | for i, label in enumerate(labels):
203 | label_occur.setdefault(int(label), []).append(i)
204 |
205 | for label, sample_index in label_occur.items():
206 | sum_[:] = 0
207 | for i in sample_index:
208 | sum_ = sum_ + diff[i]
209 | delta_c = sum_ /(1+len(sample_index))
210 | center[label] += self.alpha * delta_c
211 |
212 | @mx.operator.register("centerLoss")
213 | class CenterLossProp(mx.operator.CustomOpProp):
214 | def __init__(self, num_class, alpha, scale=1.0, batchsize=32):
215 | super(CenterLossProp, self).__init__(need_top_grad=False)
216 |
217 | self.num_class = int(num_class)
218 | self.alpha = float(alpha)
219 | self.scale = float(scale)
220 | self.batchsize = int(batchsize)
221 |
222 | def list_arguments(self):
223 | return ['data', 'label']
224 |
225 | def list_outputs(self):
226 | return ['output']
227 |
228 | def list_auxiliary_states(self):
229 | return ['diff_bias', 'center_bias', 'sum_bias']
230 |
231 | def infer_shape(self, in_shape):
232 | data_shape = in_shape[0]
233 | label_shape = (in_shape[0][0], )
234 |
235 | #store diff, same shape as input batch
236 | diff_shape = [self.batchsize, data_shape[1]]
237 |
238 | #store the center of each clss, should be (num_class, d)
239 | center_shape = [self.num_class, diff_shape[1]]
240 |
241 | #computation buf
242 | sum_shape = [diff_shape[1], ]
243 |
244 | output_shape = (in_shape[0][0], )
245 |
246 | return [data_shape, label_shape], [output_shape], [diff_shape, center_shape, sum_shape]
247 |
248 | def create_operator(self, ctx, shapes, dtypes):
249 | return CenterLoss(ctx, shapes, dtypes, self.num_class, self.alpha, self.scale)
250 |
251 |
252 |
253 | class lmnnLoss(mx.operator.CustomOp):
254 | '''
255 | LMNN Loss Layer = positive pairwise loss + triplet loss
256 | '''
257 | def __init__(self, epsilon, threshd):
258 | self.epsilon= epsilon #epsilon is the trade-off parameter between positive pairwise and triplet loss(1: epsilon)
259 | self.threshd = threshd
260 | #self.pnr = pnr
261 |
262 | def forward(self, is_train, req, in_data, out_data, aux):
263 | # print "forward"
264 | x = in_data[0]
265 | #label=in_data[1].asnumpy()
266 | ctx = x.context
267 | y = mx.nd.zeros((x.shape[0], ), ctx=ctx)
268 | halfsize = x.shape[0]/2
269 | for i in range(halfsize):
270 | pid = i + 1 if i % 2 == 0 else i - 1
271 | pdiff = x[i] - x[pid]
272 | pdist = 0.5*mx.nd.sum(pdiff * pdiff)
273 | mask = np.ones((x.shape[0],)) #index mask for negative examples
274 | mask[i] = 0
275 | mask[pid] = 0
276 | mask = mx.nd.array(mask, ctx=ctx)
277 | ndiff = x[i] - x
278 | ndist = 0.5*mx.nd.sum(ndiff*ndiff,axis=1)
279 | distdiff = (pdist - ndist +self.threshd)*mask
280 | distdiff = mx.nd.sum(mx.nd.maximum(0, distdiff))/mx.nd.sum(mask)
281 | y[i] = pdist+self.epsilon*distdiff
282 |
283 | self.assign(out_data[0], req[0], y)
284 |
285 |
286 | def backward(self, req, out_grad, in_data, out_data, in_grad, aux):
287 | # print "backward"
288 | x = in_data[0]
289 | #label = in_data[1].asnumpy()
290 | ctx = x.context
291 | grad = in_grad[0]
292 | grad[:] = 0
293 | batchsize = x.shape[0]
294 | #label = in_data[1]
295 | #xhalf=x[halfsize:x.shape[0]]
296 |
297 | for i in range(batchsize/2):
298 | #print "gradient computation", i
299 | pid = i + 1 if i % 2 == 0 else i - 1
300 | grad[i] += x[i] - x[pid]
301 | grad[pid] += x[pid] - x[i]
302 |
303 | #pnr_index = np.random.binomial(n=1, p=self.pnr/batchsize, size=batchsize)
304 | #print pnr_index
305 | mask = np.ones((batchsize,)) #index mask for negative examples
306 | mask[i] = 0
307 | mask[pid] = 0
308 | #mask=mask * pnr_index
309 | #print mask
310 |
311 | pdiff = x[i] - x[pid]
312 | pdist = 0.5 * mx.nd.sum(pdiff * pdiff)
313 | ndiff = x[i] - x
314 | ndist = 0.5 * mx.nd.sum(ndiff * ndiff,axis=1)
315 | distdiff = pdist - ndist + self.threshd
316 |
317 | index = np.zeros((batchsize, ))
318 | index[np.where(distdiff.asnumpy()>0)]=1
319 | index=index * mask
320 | index=mx.nd.array(index,ctx=ctx)
321 | #print index
322 |
323 | ratio = distdiff * index / (mx.nd.sum(distdiff * index)+1e-5)
324 | ratio = mx.nd.Reshape(ratio, shape=(batchsize,1))
325 | #print ratio.asnumpy()
326 | ratio = mx.nd.broadcast_axis(ratio, axis=1, size=x.shape[1])
327 | #print ratio.asnumpy()
328 |
329 | grad[i] += mx.nd.sum((x-x[pid]) * ratio, axis=0) * self.epsilon
330 | grad[pid] += (x[pid]-x[i]) * self.epsilon * (mx.nd.sum(distdiff * index)/(mx.nd.sum(distdiff * index)+1e-5))
331 | grad += (x[i]-x) * ratio * self.epsilon
332 |
333 | self.assign(in_grad[0], req[0], grad)
334 |
335 | @mx.operator.register("lmnnLoss")
336 | class lmnnLossProp(mx.operator.CustomOpProp):
337 | def __init__(self, epsilon=1.0, threshd=0.5):
338 | super(lmnnLossProp, self).__init__(need_top_grad=False)
339 | self.epsilon = float(epsilon)
340 | self.threshd = float(threshd)
341 | #self.pnr = float(pnr) #positive examples:negetive examples=1:pnr
342 |
343 | def list_arguments(self):
344 | return ['data'] # 'label']
345 |
346 | def list_outputs(self):
347 | return ['output']
348 |
349 | def infer_shape(self, in_shape):
350 | data_shape = in_shape[0]
351 | #label_shape = (in_shape[0][0], )
352 | output_shape = (in_shape[0][0], )
353 | return [data_shape], [output_shape]
354 |
355 | def create_operator(self, ctx, shapes, dtypes):
356 | return lmnnLoss(self.epsilon, self.threshd)
357 |
358 |
359 |
360 |
361 |
362 |
363 |
364 |
365 |
366 |
367 |
368 |
369 |
370 |
--------------------------------------------------------------------------------
/RL/dqn_mars.py:
--------------------------------------------------------------------------------
1 | import sys
2 | #sys.path.insert(0, "mxnet/python/")
3 | import find_mxnet
4 | import mxnet as mx
5 |
6 | from tensorboard import SummaryWriter
7 | import logging
8 | import numpy as np
9 | import argparse
10 | import random
11 | import math
12 |
13 | from batch_provider_mars import BatchProvider
14 | from utils import get_imRecordIter
15 | from replay_memory import ReplayMemory
16 | from tb_system import TensorBoardSystem
17 | from agent import Agent
18 | import glob
19 |
20 |
21 | def parse_args():
22 | parser = argparse.ArgumentParser(
23 | description='multishot recog training')
24 | parser.add_argument('--gpus', type=str, default='1',
25 | help='the gpus will be used, e.g "0,1"')
26 | parser.add_argument('--data-dir', type=str,
27 | default="/data3/matt/MARS",
28 | help='data directory')
29 | parser.add_argument('--num-examples', type=int, default=10000,
30 | help='the number of training examples')
31 | parser.add_argument('--num-id', type=int, default=624,
32 | help='the number of training ids')
33 | parser.add_argument('--batch-size', type=int, default=16,
34 | help='the batch size')
35 | parser.add_argument('--sample-size', type=int, default=4,
36 | help='sample frames from each video')
37 | parser.add_argument('--patch-size', type=int, default=4,
38 | help='size of single image patch from video')
39 | parser.add_argument('--lr', type=float, default=1e-2,
40 | help='the initial learning rate')
41 | parser.add_argument('--num-epoches', type=int, default=100,
42 | help='the number of training epochs')
43 | parser.add_argument('--mode', type=str, default='prid_video_match_%d-%d' % (4, 4),
44 | help='save names of model and log')
45 | parser.add_argument('--verifi-threshd', type=float, default=0.9 + 2.3,
46 | help='verification threshold')
47 | parser.add_argument('--kv-store', type=str,
48 | default='device', help='the kvstore type')
49 | parser.add_argument('--network', type=str,
50 | default='inception-bn', help='network name')
51 | parser.add_argument('--model-load-epoch', type=int, default=1,
52 | help='load the model on an epoch using the model-load-prefix')
53 | parser.add_argument('--model-load-prefix', type=str, default='mars_baseline_b4',
54 | help='load model prefix')
55 | parser.add_argument('--q_duel', action='store_true', default=False,
56 | help='if use duel network')
57 | parser.add_argument('--q_double', action='store_true', default=False,
58 | help='if use double DQN')
59 | parser.add_argument('--q-weight', type=float, default=1.0,
60 | help='DQN loss weight')
61 | parser.add_argument('--q-gamma', type=float, default=0.99,
62 | help='DQN decay rate')
63 | parser.add_argument('--penalty', type=float, default=0.1,
64 | help='DQN unsure penalty rate')
65 | parser.add_argument('--ob-epochs', type=int, default=1,
66 | help='DQN observing epochs')
67 | parser.add_argument('--num_acts', type=int, default=3,
68 | help='number of actions')
69 | parser.add_argument('--acts_per_round', type=int, default=3,
70 | help='number of actions per round')
71 | parser.add_argument('--fix_gamma', action='store_true', default=False,
72 | help='if fix_gamma in bn')
73 | parser.add_argument('--fix_penalty', action='store_true', default=False,
74 | help='if fix penalty')
75 | parser.add_argument('--no_sim', action='store_true', default=False,
76 | help='if no sim net')
77 | parser.add_argument('--num_hidden', type=int, default=128,
78 | help='number of hidden neurons in Q learning fc layers')
79 | parser.add_argument('--target_freq', type=int, default=500,
80 | help='number of hidden neurons in Q learning fc layers')
81 | parser.add_argument('--tisr', type=int, default=1,
82 | help='time inverse lr step')
83 | parser.add_argument('--num_sim', type=int, default=128,
84 | help='number of hidden neurons in similarity network')
85 | parser.add_argument('--lr_step', type=str, default='100,200',
86 | help='number of epoches to shrink lr')
87 | parser.add_argument('--q_bn', action='store_true', default=False,
88 | help='if add bn in qnet')
89 | parser.add_argument('--maxout', action='store_true', default=False,
90 | help='if add maxout in qnet')
91 | parser.add_argument('--pr_alpha', type=float, default=0.6,
92 | help='prioritized-replay alpha')
93 | parser.add_argument('--pr_beta', type=float, default=0.4,
94 | help='prioritized-replay beta')
95 | parser.add_argument('--add_rewards', action='store_true', default=False,
96 | help='if add rewards for single agent')
97 | parser.add_argument('--epsilon', action='store_true', default=False,
98 | help='if epsilon learning')
99 | parser.add_argument('--pos_weight', type=float, default=1.0,
100 | help='positive rewards weight')
101 | parser.add_argument('--e2e', action='store_true', default=False,
102 | help='if e2e')
103 | parser.add_argument('--history', action='store_true', default=False,
104 | help='if use history')
105 | parser.add_argument('--optimizer', type=str, default='sgd',
106 | help='choose the optimizer in {sgd, adam, rms}')
107 | parser.add_argument('--memory_size', type=int, default=1000,
108 | help='memory buffer size')
109 | parser.add_argument('--final_epsilon', type=float, default=0.1,
110 | help='final epsilon for exploration')
111 | parser.add_argument('--exp_ratio', type=float, default=0.1,
112 | help='ratio for exploration in whole training process')
113 | parser.add_argument('--hinge', action='store_true', default=False,
114 | help='if use hinge loss')
115 | parser.add_argument('--train-set', type=str, default='image_valid',
116 | help='load model prefix')
117 | parser.add_argument('--valid-set', type=str, default='image_test',
118 | help='load model prefix')
119 | parser.add_argument('--min-states', type=int, default=4,
120 | help='minimum states for history')
121 | parser.add_argument('--min-imgs', type=int, default=1,
122 | help='minimum imgs for each state')
123 | parser.add_argument('--precomputed', action='store_true', default=False,
124 | help='if feature precomputed')
125 | parser.add_argument('--fusion', action='store_true', default=False,
126 | help='if use data fusion')
127 | parser.add_argument('--total-forward', action='store_true', default=False,
128 | help='if use data fusion')
129 | parser.add_argument('--verbose', action='store_true', default=False,
130 | help='if print debug info')
131 | parser.add_argument('--avg-dqn-k', type=int, default=5,
132 | help='number of target networks for avg-dqn')
133 | return parser.parse_args()
134 |
135 |
136 | args = parse_args()
137 | logging.basicConfig(filename='log/%s.log' % args.mode, level=logging.DEBUG)
138 | logger = logging.getLogger()
139 | logger.setLevel(logging.DEBUG)
140 | logging.info(args)
141 | logdir = './tblog/' + args.mode
142 | summary_writer = SummaryWriter(logdir)
143 | monitor_writer = SummaryWriter('./molog/' + args.mode)
144 | print args
145 | batch_size = args.batch_size
146 | num_epoch = args.num_epoches
147 | devices = [mx.gpu(int(i)) for i in args.gpus.split(',')]
148 | lr = args.lr
149 |
150 | agent = Agent(args, devices[0])
151 |
152 |
153 | prefix = 'models/%s' % args.mode
154 |
155 | memory = ReplayMemory(replay_size=args.memory_size, alpha=args.pr_alpha)
156 | tbs_V = TensorBoardSystem('mars', summary_writer)
157 |
158 |
159 | def get_feat(dataset, is_train=False):
160 | ret = []
161 | for i in xrange(1501):
162 | cur = []
163 | for j in xrange(1, 7):
164 | images = glob.glob('%s/recs/%s/id_%d_%d*' % (args.data_dir, dataset, i, j))
165 | if len(images) == 0:
166 | continue
167 | cam = []
168 | for k in images:
169 | bs, flst = 0, open(k)
170 | for line in flst:
171 | bs += 1
172 | org_iter = get_imRecordIter(
173 | args, k[len(args.data_dir)+1:-4], (3, 224, 112), 1,
174 | shuffle=is_train, aug=is_train, even_iter=True)
175 | cam.append(org_iter)
176 | if len(cam) > 0:
177 | cur.append(cam)
178 | if len(cur) > 0:
179 | ret.append(cur)
180 | return ret
181 |
182 | trainF = get_feat('train', True)
183 |
184 | train = BatchProvider(trainF, True, args.sample_size, sample_ratio=0.5, need_feat=args.history)
185 | batch_size = args.batch_size
186 | N = args.num_id
187 |
188 | iterations = args.num_examples
189 | memory = ReplayMemory(replay_size=args.memory_size, alpha=args.pr_alpha)
190 | epsilon = 1.0
191 | final_epsilon = args.final_epsilon
192 | rand_ep, fix_ep = 0, int(args.num_epoches * args.exp_ratio)
193 | epsilon_shr = (epsilon - final_epsilon) / (fix_ep - rand_ep) / iterations
194 | max_penalty = 1
195 |
196 | for e in xrange(args.num_epoches):
197 | if args.verbose:
198 | print 'Epoch', e
199 | for batch in xrange(iterations):
200 | if args.verbose:
201 | print 'Epoch', e, 'batch', batch
202 | cur, a, b = train.provide()
203 | y = ((a %N) == (b % N))
204 | data_batch = agent.wash_data(cur)
205 | Qvalue = agent.get_Qvalue(data_batch, use_target=False, is_train=False)
206 | if args.verbose:
207 | print 'forward', Qvalue
208 | qs = agent.Q.get_outputs()[1].asnumpy()
209 | print qs
210 | print qs.max(), qs.min(), qs.mean(), qs.std()
211 | Qvalue_softmax = mx.nd.SoftmaxActivation(mx.nd.array(Qvalue, ctx=devices[0]) / epsilon / 5).asnumpy()
212 | reward, action, i = [0 for _ in xrange(args.min_imgs)], [-1 for _ in xrange(args.min_imgs)], args.min_imgs
213 | while i < args.sample_size:
214 | if args.total_forward:
215 | if i + 1 < args.sample_size:
216 | k = 2
217 | else:
218 | Q_choice = np.argmax(Qvalue[i, :2]) if args.epsilon else np.random.choice(args.num_acts, 1, p=Qvalue_softmax[i, :2])[0]
219 | if random.random() <= epsilon and args.epsilon:
220 | k = random.randrange(2)
221 | else:
222 | k = Q_choice
223 | else:
224 | Q_choice = np.argmax(Qvalue[i]) if args.epsilon else np.random.choice(args.num_acts, 1, p=Qvalue_softmax[i])[0]
225 | if random.random() <= epsilon and args.epsilon:
226 | k = random.randrange(args.num_acts)
227 | else:
228 | k = Q_choice
229 | cls = k % args.acts_per_round
230 | step = k - 1
231 | if cls >= 2:
232 | if i + step >= args.sample_size:
233 | r = -max_penalty
234 | terminal = True
235 | else:
236 | r = -args.penalty * (2.0 - (0.5 ** (step - 1)))
237 | terminal = False
238 | else:
239 | r = 1 if cls == y else -max_penalty #(-10 if y else -10)
240 | terminal = True
241 | if args.pos_weight > 1:
242 | if y:
243 | r *= args.pos_weight
244 | else:
245 | if not y:
246 | r /= args.pos_weight
247 | reward.append(r)
248 | action.append(k)
249 | if args.verbose:
250 | print i, (a, b), Qvalue[i], k, (y, cls), r
251 | tbsQvalue = np.zeros(3)
252 | tbsQvalue[min(2, cls)] = Qvalue[i, k]
253 | tbs_V.put_board(tbsQvalue, min(2, cls), y, r, epsilon, i + 1, dummy=False)
254 | if terminal:
255 | break
256 | i += step
257 | memory.add(dict(cur = cur, reward=reward, action=action, y=y, cnt=1))
258 | if rand_ep <= e < fix_ep:
259 | epsilon -= epsilon_shr
260 | epsilon = max(epsilon, final_epsilon)
261 | if e * args.num_examples + batch < 50:#args.num_examples / 2:
262 | continue
263 |
264 | replays, idxes, weights = memory.sample(args.batch_size, args.pr_beta)
265 | new_weights = []
266 | for b in xrange(args.batch_size):
267 | cur, reward, action, y = replays[b]['cur'], replays[b]['reward'], replays[b]['action'], replays[b]['y']
268 | data_batch, delta_sum = agent.wash_data(cur), 0
269 | Qvalue = agent.get_Qvalue(data_batch, use_target=False, is_train=True)
270 | grad, r, grad_norm = np.zeros((args.sample_size, args.num_acts)), 0, 0
271 | t = args.min_imgs
272 | for i in xrange(len(action) - 1):
273 | t += action[i] - 1
274 | for i in xrange(len(action) - 1, -1, -1):
275 | if i < len(action) - 1:
276 | r = reward[i] + args.q_gamma * max(last_Q)#min(1.0, max(last_Q))
277 | else:
278 | r = reward[i]
279 | last_Q = Qvalue[t]
280 | if args.verbose:
281 | print i, t, action[i], y, Qvalue[t], r,
282 | delta = -r + Qvalue[t, action[i]]
283 | if not args.total_forward:
284 | delta /= len(action)
285 | if abs(delta) > 1:
286 | delta /= abs(delta)
287 | if args.hinge:
288 | if (y and action == 1 or not y and action == 0) and delta > 0:
289 | clipped_delta = 0
290 | elif (y and action == 0 or not y and action == 1) and delta < 0:
291 | clipped_delta = 0
292 | else:
293 | clipped_delta = delta
294 | else:
295 | clipped_delta = delta
296 | grad[t, action[i]] = clipped_delta
297 | grad_norm += (clipped_delta) * (clipped_delta)
298 | if args.verbose:
299 | print delta, grad[i]
300 | delta_sum += abs(delta)
301 | if i > 0:
302 | t -= (action[i - 1] - 1)
303 | if args.total_forward:
304 | break
305 | new_weights.append(1)
306 | replays[b]['cnt'] += 1
307 | replays[b]['delta'] = delta
308 | grad_norm = math.sqrt(grad_norm)
309 | if args.verbose:
310 | print 'grad norm =', grad_norm
311 | agent.update([mx.nd.array(grad, ctx=devices[0]), mx.nd.zeros(agent.Q.get_outputs()[1].shape, ctx=devices[0])])
312 | memory.update_priorities(idxes, new_weights)
313 | if args.verbose:
314 | print 'gamma =', args.q_gamma, 'epsilon =', epsilon
315 | if (1+batch) % 100 == 0:
316 | tbs_V.print_board()
317 | if (e+1) % 1 == 0:
318 | agent.save(e+1)
319 |
--------------------------------------------------------------------------------
/RL/dqn.py:
--------------------------------------------------------------------------------
1 | import sys
2 | #sys.path.insert(0, "mxnet/python/")
3 | import find_mxnet
4 | import mxnet as mx
5 |
6 | from tensorboard import SummaryWriter
7 | import logging
8 | import numpy as np
9 | import argparse
10 | import random
11 | import math
12 | from sklearn.metrics import average_precision_score
13 |
14 | from batch_provider import BatchProvider
15 | from utils import get_imRecordIter
16 | from replay_memory import ReplayMemory
17 | from tb_system import TensorBoardSystem
18 | from agent import Agent
19 |
20 |
21 | def parse_args():
22 | parser = argparse.ArgumentParser(
23 | description='multishot recog training')
24 | parser.add_argument('--gpus', type=str, default='1',
25 | help='the gpus will be used, e.g "0,1"')
26 | parser.add_argument('--data-dir', type=str,
27 | default="/data3/matt/prid_2011",
28 | help='data directory')
29 | parser.add_argument('--num-examples', type=int, default=10000,
30 | help='the number of training examples')
31 | parser.add_argument('--num-train', type=int, default=20504,
32 | help='the number of training examples')
33 | parser.add_argument('--num-valid', type=int, default=19529,
34 | help='the number of training examples')
35 | parser.add_argument('--num-id', type=int, default=10,
36 | help='the number of training ids')
37 | parser.add_argument('--batch-size', type=int, default=16,
38 | help='the batch size')
39 | parser.add_argument('--sample-size', type=int, default=4,
40 | help='sample frames from each video')
41 | parser.add_argument('--patch-size', type=int, default=1,
42 | help='size of single image patch from video')
43 | parser.add_argument('--lr', type=float, default=1e-2,
44 | help='the initial learning rate')
45 | parser.add_argument('--num-epoches', type=int, default=100,
46 | help='the number of training epochs')
47 | parser.add_argument('--mode', type=str, default='prid_video_match_%d-%d' % (4, 4),
48 | help='save names of model and log')
49 | parser.add_argument('--verifi-threshd', type=float, default=0.9 + 2.3,
50 | help='verification threshold')
51 | parser.add_argument('--kv-store', type=str,
52 | default='device', help='the kvstore type')
53 | parser.add_argument('--network', type=str,
54 | default='inception-bn', help='network name')
55 | parser.add_argument('--model-load-epoch', type=int, default=1,
56 | help='load the model on an epoch using the model-load-prefix')
57 | parser.add_argument('--model-load-prefix', type=str, default='prid_baseline_b4',
58 | help='load model prefix')
59 | parser.add_argument('--q_duel', action='store_true', default=False,
60 | help='if use duel network')
61 | parser.add_argument('--q_double', action='store_true', default=False,
62 | help='if use double DQN')
63 | parser.add_argument('--q-weight', type=float, default=1.0,
64 | help='DQN loss weight')
65 | parser.add_argument('--q-gamma', type=float, default=0.99,
66 | help='DQN decay rate')
67 | parser.add_argument('--penalty', type=float, default=0.1,
68 | help='DQN unsure penalty rate')
69 | parser.add_argument('--ob-epochs', type=int, default=1,
70 | help='DQN observing epochs')
71 | parser.add_argument('--num_acts', type=int, default=3,
72 | help='number of actions')
73 | parser.add_argument('--acts_per_round', type=int, default=3,
74 | help='number of actions per round')
75 | parser.add_argument('--fix_gamma', action='store_true', default=False,
76 | help='if fix_gamma in bn')
77 | parser.add_argument('--fix_penalty', action='store_true', default=False,
78 | help='if fix penalty')
79 | parser.add_argument('--no_sim', action='store_true', default=False,
80 | help='if no sim net')
81 | parser.add_argument('--num_hidden', type=int, default=128,
82 | help='number of hidden neurons in Q learning fc layers')
83 | parser.add_argument('--target_freq', type=int, default=500,
84 | help='number of hidden neurons in Q learning fc layers')
85 | parser.add_argument('--tisr', type=int, default=1,
86 | help='time inverse lr step')
87 | parser.add_argument('--num_sim', type=int, default=128,
88 | help='number of hidden neurons in similarity network')
89 | parser.add_argument('--lr_step', type=str, default='50,75',
90 | help='number of epoches to shrink lr')
91 | parser.add_argument('--q_bn', action='store_true', default=False,
92 | help='if add bn in qnet')
93 | parser.add_argument('--maxout', action='store_true', default=False,
94 | help='if add maxout in qnet')
95 | parser.add_argument('--pr_alpha', type=float, default=0.6,
96 | help='prioritized-replay alpha')
97 | parser.add_argument('--pr_beta', type=float, default=0.4,
98 | help='prioritized-replay beta')
99 | parser.add_argument('--add_rewards', action='store_true', default=False,
100 | help='if add rewards for single agent')
101 | parser.add_argument('--epsilon', action='store_true', default=False,
102 | help='if epsilon learning')
103 | parser.add_argument('--pos_weight', type=float, default=1.0,
104 | help='positive rewards weight')
105 | parser.add_argument('--e2e', action='store_true', default=False,
106 | help='if e2e')
107 | parser.add_argument('--history', action='store_true', default=False,
108 | help='if use history')
109 | parser.add_argument('--optimizer', type=str, default='sgd',
110 | help='choose the optimizer in {sgd, adam, rms}')
111 | parser.add_argument('--memory_size', type=int, default=1000,
112 | help='memory buffer size')
113 | parser.add_argument('--final_epsilon', type=float, default=0.1,
114 | help='final epsilon for exploration')
115 | parser.add_argument('--exp_ratio', type=float, default=0.1,
116 | help='ratio for exploration in whole training process')
117 | parser.add_argument('--hinge', action='store_true', default=False,
118 | help='if use hinge loss')
119 | parser.add_argument('--train-set', type=str, default='image_valid',
120 | help='load model prefix')
121 | parser.add_argument('--valid-set', type=str, default='image_test',
122 | help='load model prefix')
123 | parser.add_argument('--min-states', type=int, default=4,
124 | help='minimum states for history')
125 | parser.add_argument('--min-imgs', type=int, default=0,
126 | help='minimum imgs for each state')
127 | parser.add_argument('--precomputed', action='store_true', default=False,
128 | help='if feature precomputed')
129 | parser.add_argument('--fusion', action='store_true', default=False,
130 | help='if use data fusion')
131 | parser.add_argument('--total-forward', action='store_true', default=False,
132 | help='if use data fusion')
133 | parser.add_argument('--verbose', action='store_true', default=False,
134 | help='if print debug info')
135 | parser.add_argument('--crossvalid', action='store_true', default=False,
136 | help='if do cross validation')
137 | return parser.parse_args()
138 |
139 |
140 | args = parse_args()
141 | logging.basicConfig(filename='log/%s.log' % args.mode, level=logging.DEBUG)
142 | logger = logging.getLogger()
143 | logger.setLevel(logging.DEBUG)
144 | logging.info(args)
145 | logdir = './tblog/' + args.mode
146 | print args
147 | batch_size = args.batch_size
148 | num_epoch = args.num_epoches
149 | devices = [mx.gpu(int(i)) for i in args.gpus.split(',')]
150 | lr = args.lr
151 |
152 | agent = Agent(args, devices[0])
153 |
154 | prefix = 'models/%s' % args.mode
155 | if args.crossvalid:
156 | summary_writer = SummaryWriter(logdir)
157 | tbs_V = TensorBoardSystem('V', summary_writer)
158 |
159 | valid_iter = get_imRecordIter(
160 | args, 'recs/%s'%args.valid_set, (3, 224, 112), 1,
161 | shuffle=False, aug=False, even_iter=True)
162 | train_iter = get_imRecordIter(
163 | args, 'recs/%s'%args.train_set, (3, 224, 112), 1,
164 | shuffle=False, aug=True, even_iter=True)
165 |
166 | valid_lst = np.loadtxt('%s/recs/%s.txt'%(args.data_dir, args.valid_set)).astype(int)
167 | train_lst = np.loadtxt('%s/recs/%s.txt'%(args.data_dir, args.train_set)).astype(int)
168 |
169 | valid = BatchProvider(valid_iter, valid_lst, False, args.sample_size, sample_ratio=0.5, is_valid=True, need_feat=args.history)
170 | train = BatchProvider(train_iter, train_lst, True, args.sample_size, sample_ratio=0.5, need_feat=args.history)
171 | N = args.num_id
172 |
173 | cmcs, ap, cmcn, vscores, vturns = [[], [], [], []], [], [1, 5, 10, 20], [], []
174 |
175 | iterations = args.num_examples
176 | memory = ReplayMemory(replay_size=args.memory_size, alpha=args.pr_alpha)
177 | epsilon = 1.0
178 | final_epsilon = args.final_epsilon
179 | rand_ep, fix_ep = 0, int(args.num_epoches * args.exp_ratio)
180 | epsilon_shr = (epsilon - final_epsilon) / (fix_ep - rand_ep) / iterations
181 |
182 | max_penalty = 1
183 |
184 | frf = open(('figurelog/%s' % args.mode), 'w')
185 |
186 | for e in xrange(args.num_epoches):
187 | if args.verbose:
188 | print 'Epoch', e
189 | for batch in xrange(iterations):
190 | if args.verbose:
191 | print 'Epoch', e, 'batch', batch
192 | cur, a, b, cur_id = train.provide()
193 | y = ((a %N) == (b % N))
194 | data_batch = agent.wash_data(cur)
195 | Qvalue = agent.get_Qvalue(data_batch, is_train=False)
196 | if args.verbose:
197 | print 'forward', Qvalue
198 | qs = agent.Q.get_outputs()[1].asnumpy()
199 | print qs
200 | print qs.max(), qs.min(), qs.mean(), qs.std()
201 | Qvalue_softmax = mx.nd.SoftmaxActivation(mx.nd.array(Qvalue, ctx=devices[0]) / epsilon / 5).asnumpy()
202 | reward, action, i = [0 for _ in xrange(args.min_imgs)], [-1 for _ in xrange(args.min_imgs)], args.min_imgs
203 | while i < args.sample_size:
204 | if args.total_forward:
205 | if i + 1 < args.sample_size:
206 | k = 2
207 | else:
208 | Q_choice = np.argmax(Qvalue[i, :2]) if args.epsilon else np.random.choice(args.num_acts, 1, p=Qvalue_softmax[i, :2])[0]
209 | if random.random() <= epsilon and args.epsilon:
210 | k = random.randrange(2)
211 | else:
212 | k = Q_choice
213 | else:
214 | Q_choice = np.argmax(Qvalue[i]) if args.epsilon else np.random.choice(args.num_acts, 1, p=Qvalue_softmax[i])[0]
215 | if random.random() <= epsilon and args.epsilon:
216 | k = random.randrange(args.num_acts)
217 | else:
218 | k = Q_choice
219 | cls = k % args.acts_per_round
220 | step = k - 1
221 | if cls >= 2:
222 | if i + step >= args.sample_size:
223 | r = -max_penalty
224 | terminal = True
225 | else:
226 | #r = -args.penalty * step #- step_penalty_rate / (args.sample_size - t[i])
227 | r = -args.penalty * (2.0 - (0.5 ** (step - 1)))
228 | #if not args.fix_penalty else 1.0 - 0.5 * cnt
229 | terminal = False
230 | else:
231 | r = 1 if cls == y else -max_penalty #(-10 if y else -10)
232 | terminal = True
233 | if args.pos_weight > 1:
234 | if y:
235 | r *= args.pos_weight
236 | else:
237 | if not y:
238 | r /= args.pos_weight
239 | reward.append(r)
240 | action.append(k)
241 | if args.verbose:
242 | print i, (a, b), Qvalue[i], k, (y, cls), r
243 | if terminal:
244 | break
245 | i += step
246 | #memory.add(dict(cur = cur_id, reward=reward, action=action, y=y, cnt=1))
247 | memory.add(dict(cur = cur, reward=reward, action=action, y=y, cnt=1))
248 | if rand_ep <= e < fix_ep:
249 | epsilon -= epsilon_shr
250 | epsilon = max(epsilon, final_epsilon)
251 | if e * args.num_examples + batch < 50:#args.num_examples / 2:
252 | continue
253 |
254 | replays, idxes, weights = memory.sample(args.batch_size, args.pr_beta)
255 | new_weights = []
256 | for b in xrange(args.batch_size):
257 | cur, reward, action, y = replays[b]['cur'], replays[b]['reward'], replays[b]['action'], replays[b]['y']
258 | data_batch, delta_sum = agent.wash_data(cur), 0
259 | Qvalue = agent.get_Qvalue(data_batch, is_train=True)
260 | grad, r, grad_norm = np.zeros((args.sample_size, args.num_acts)), 0, 0
261 | t = args.min_imgs
262 | for i in xrange(len(action) - 1):
263 | t += action[i] - 1
264 | for i in xrange(len(action) - 1, -1, -1):
265 | if i < len(action) - 1:
266 | r = reward[i] + args.q_gamma * max(last_Q)#min(1.0, max(last_Q))
267 | else:
268 | r = reward[i]
269 | #last_Q_, last_Q = Qvalue_[t], Qvalue[t]
270 | last_Q = Qvalue[t]
271 | if args.verbose:
272 | print i, t, action[i], y, Qvalue[t], r,
273 | delta = -r + Qvalue[t, action[i]]
274 | if not args.total_forward:
275 | delta /= len(action)
276 | if abs(delta) > 1:
277 | delta /= abs(delta)
278 | if args.hinge:
279 | if (y and action == 1 or not y and action == 0) and delta > 0:
280 | clipped_delta = 0
281 | elif (y and action == 0 or not y and action == 1) and delta < 0:
282 | clipped_delta = 0
283 | else:
284 | clipped_delta = delta
285 | else:
286 | clipped_delta = delta
287 | #if abs(clipped_delta) > 1:
288 | # clipped_delta /= abs(clipped_delta)
289 | grad[t, action[i]] = clipped_delta
290 | grad_norm += (clipped_delta) * (clipped_delta)
291 | if args.verbose:
292 | print delta, grad[i]
293 | delta_sum += abs(delta)
294 | if i > 0:
295 | t -= (action[i - 1] - 1)
296 | if args.total_forward:
297 | break
298 | new_weights.append(1)
299 | replays[b]['cnt'] += 1
300 | replays[b]['delta'] = delta
301 | grad_norm = math.sqrt(grad_norm)
302 | if args.verbose:
303 | print 'grad norm =', grad_norm
304 | agent.update([mx.nd.array(grad, ctx=devices[0]), mx.nd.zeros(agent.Q.get_outputs()[1].shape, ctx=devices[0])])
305 | memory.update_priorities(idxes, new_weights)
306 | if args.verbose:
307 | print 'gamma =', args.q_gamma, 'epsilon =', epsilon
308 | #args.q_gamma = min(0.99, 0.99 * args.q_gamma + 0.01)
309 |
310 | if (e+1) % 1 == 0:
311 | agent.save(e+1)
312 |
313 | if not args.crossvalid:
314 | continue
315 |
316 | valid.reset()
317 | batch, valid_cnt, vv, vpool = 0, 0, np.zeros((N*2, N)), set()
318 | vs, vt = [0 for i in xrange(N+N)], [0 for i in xrange(N+N)]
319 | fts = [[0 for _2 in xrange(N)] for _1 in xrange(N)]
320 | while valid_cnt < N*N:
321 | if args.verbose:
322 | print 'Epoch', e, 'valid', batch, 'vc', valid_cnt
323 | batch += 1
324 | cur, a, b = valid.provide()
325 | y = ((a %N) == (b % N))
326 | data_batch = agent.wash_data(cur)
327 | Qvalue = agent.get_Qvalue(data_batch, is_train=False)
328 | if args.verbose:
329 | print Qvalue
330 | print agent.Q.get_outputs()[1].asnumpy().max(), agent.Q.get_outputs()[1].asnumpy().min()
331 | i = 0
332 | while i < args.sample_size:
333 | if args.total_forward:
334 | if i + 1 < args.sample_size:
335 | k = 2
336 | else:
337 | k = np.argmax(Qvalue[i, :2])
338 | else:
339 | k = np.argmax(Qvalue[i])
340 | cls = k % args.acts_per_round
341 | step = k - 1
342 | if cls >= 2:
343 | if i + step >= args.sample_size:
344 | r = -max_penalty
345 | terminal = True
346 | else:
347 | r = -args.penalty * (2.0 - (0.5 ** (step - 1)))
348 | terminal = False
349 | else:
350 | r = 1 if cls == y else -max_penalty #(-10 if y else -10)
351 | terminal = True
352 | if args.pos_weight > 1:
353 | if y:
354 | r *= args.pos_weight
355 | else:
356 | if not y:
357 | r /= args.pos_weight
358 | if args.verbose:
359 | print 'valid', i, (a, b), Qvalue[i], k, (y, cls), r
360 | va, vb = a, b % N
361 | if (va, vb) not in vpool:
362 | vs[va] += r
363 | vs[vb+N] += r
364 | if terminal:
365 | if (va, vb) not in vpool:#vv[va][vb] == 0:
366 | fts[va][vb] = (k + (3 if va == vb else 0), i)
367 | vpool.add((va, vb))
368 | valid_cnt += 1
369 | vv[va][vb] = Qvalue[i][0] - Qvalue[i][1]
370 | vt[va] += i + 1
371 | vv[vb+N][va] += vv[va][vb]
372 | vt[vb+N] += i + 1
373 | if args.verbose:
374 | print va, vb, vv[va][vb], r
375 | tbsQvalue = np.zeros(3)
376 | tbsQvalue[min(2, cls)] = Qvalue[i, k]
377 | tbs_V.put_board(tbsQvalue, min(2, cls), y, r, epsilon, i + 1, dummy=False)
378 | if terminal:
379 | break
380 | i += step
381 | if valid_cnt % 100 == 0:
382 | tbs_V.print_board()
383 | for i in xrange(N*2):
384 | a, r = i % N, 0
385 | for b in xrange(N):
386 | if a != b and vv[i][b] <= vv[i][a]:
387 | r += 1
388 | for k in xrange(4):
389 | cmcs[k].append(1.0 if r < cmcn[k] else 0.0)
390 | if len(cmcs[k]) >= N*4:
391 | summary_writer.add_scalar(args.mode[0] + ('_CMC%d'%cmcn[k]), sum(cmcs[k][-N*2:]) / (N*2), len(cmcs[k]) - N*2)
392 | vscores += [vs[i]]
393 | vturns += [vt[i]]
394 | score = np.array([-vv[i][_] for _ in xrange(N)])
395 | label = np.array([(1 if _ == a else 0) for _ in xrange(N)])
396 | ap.append(average_precision_score(label, score))
397 | if args.verbose:
398 | print 'ap', i, ap[-1]
399 | if len(ap) >= N*4:
400 | summary_writer.add_scalar(args.mode[0] + '_MAP', sum(ap[-N*2:]) / (N*2), len(ap) - N*2)
401 | if len(ap) >= N*4:
402 | summary_writer.add_scalar(args.mode[0] + ('_scores'), sum(vscores[-N*2:]) / (N*2), len(vscores) - N*2)
403 | summary_writer.add_scalar(args.mode[0] + ('_turns'), sum(vturns[-N*2:]) / (N*2), len(vturns) - N*2)
404 | frf.write('%d %.3f %.3f %.3f %.3f %.3f %.3f\n'%(e, sum(cmcs[0][-N*2:]) / (N*2), sum(cmcs[1][-N*2:]) / (N*2), sum(cmcs[2][-N*2:]) / (N*2), sum(cmcs[3][-N*2:]) / (N*2), sum(ap[-N*2:]) / (N*2), sum(vturns[-N*2:])*1.0 / (N*N*2)))
405 | cnt_map = [[0 for j in xrange(6)] for i in xrange(args.sample_size)]
406 | for i in xrange(N):
407 | for j in xrange(N):
408 | cnt_map[fts[i][j][1]][fts[i][j][0]] += 1
409 | for i in xrange(args.sample_size):
410 | for j in xrange(6):
411 | frf.write(str(cnt_map[i][j]) + ' ')
412 | frf.write('\n')
413 | frf.flush()
414 |
415 | frf.close()
416 |
--------------------------------------------------------------------------------